library(readxl)
library(tibble)
library(ggplot2)
library(tidyverse)
#library(tm)
library(showtext)
library(lubridate)
library(SnowballC)
library(wordcloud)
library(RColorBrewer)
library(officer)
library(dplyr)
library(showtext)
library(tidyr)
library(knitr)
library(kableExtra)
library(cowplot)
library(colorspace)
library(ggrepel)
library(sf)
#library(tmap)    # for static and interactive maps
library(leaflet) # for interactive maps
#library(spData)
library(rnaturalearth)
library(leaflet.extras)
library(sp)
library(wbstats)
library(formattable)
library(rvest)
library(XML)
library(BBmisc)
library(xml2)
library(fmsb)
library(colormap)
library(circlize)
library(networkD3)
library(influential) #to create Sankey Diagram
library(igraph) #to create Sankey Diagram
library(oce) #to create Sankey Diagram
library(ggraph) #to create Sankey Diagram
library(devtools) #to add some external libraries
#library(addTextLabels)
library(openxlsx)
library(data.tree) #to create a hierarchy
library(htmlwidgets) #to save interative graphs
library(circlepackeR) #to plot circles # devtools::install_github("jeromefroe/circlepackeR")
library(ggradar) # devtools::install_github("ricardo-bion/ggradar")
library(geomtextpath)
library(ggflags) #devtools::install_github("jimjam-slam/ggflags")
library(geomtextpath)
library(ggridges)
library(ggtext)
library(packcircles)

font_paths("/Users/theotimebourgeois/Library/Fonts/AvertaPE-Black.otf")
font_add(family = "AvertaPE-Black.otf",
         regular = "AvertaPE-Black.otf")
font_paths("/Users/theotimebourgeois/Library/Fonts/AvertaPE-Regular.otf")
font_add(family = "AvertaPE-Regular.otf",
         regular = "AvertaPE-Regular.otf")

purple <- "#2C2C54" #c("#00051E")
pink <- "#A40E4C"
blue <- "#2E86AB"
yellow <- "#FF9C00"
lila <- "#E3DFFF"
brown <- "#C3979F"
grey <- "#BFBFBF"
grey_light <- "#F2F2F2"
white <- "#FFFFFF"
mycols2 <- c(blue,purple)
mycols3 <- c(purple,pink,blue)
mycols4 <- c(purple,pink,blue,blue,yellow)
mycols5 <- c(white,blue,purple)
allcols <- c(purple,blue,pink,yellow,lila,brown,grey)


mypal <- function(nbcol){
  colsample <- allcols[1:nbcol]
  return(colsample)
}

Introduction

Le cinéma ne dit pas autrement les choses, il dit autre chose.
The cinema does not say things differently, it says something else.

Éric Rohmer, French Director

First of all, the following analysis is purely subjective and is in no way representative of global consumption behaviour.
It is, however, representative of my cinema consumption since I was 20 years old and the data has been meticulously collected to arrive at this conclusion which is a snapshot at a given moment of my cinephilia with the biases that it generates: I am a young Frenchman who has been more or less influenced by his choice of films and who obviously has tastes that cannot be explained but that can be identified.

Overview of my Database

Beginning <- as.Date("2019-09-12")

ToSeeFilm <- read_excel("/Users/theotimebourgeois/Documents/Documents/Critique Film.xlsx",sheet = "Film à voir")
ToSeeFilm$Saga[ToSeeFilm$Saga=="Batman"] <- "DC"
ToSeeFilm$Saga[ToSeeFilm$`Maison de distribution`=="DreamWorks Animation" & !is.na(ToSeeFilm$`Maison de distribution`)] <- "DreamWorks"
ToSeeFilm <- ToSeeFilm %>% filter(!is.na(`Titre du film`))

#Nettoyage
CritiqueFilm <- read_excel("/Users/theotimebourgeois/Documents/Documents/Critique Film.xlsx", sheet = "Notation")
CritiqueFilm$Saga[CritiqueFilm$Saga=="Batman"] <- "DC"
CritiqueFilm$Saga[CritiqueFilm$`Maison de distribution`=="DreamWorks Animation" & !is.na(CritiqueFilm$`Maison de distribution`)] <- "DreamWorks"
CritiqueFilm <- CritiqueFilm %>% filter(!is.na(`Titre du film`))

DateHole <- ((CritiqueFilm$`Dernier visionnage` %>% min(na.rm=T) %>% as.Date()) - Beginning) %>% as.numeric()
NbNA <- CritiqueFilm$`Dernier visionnage` %>% is.na() %>% sum()
CritiqueFilm$`Dernier visionnage`[is.na(CritiqueFilm$`Dernier visionnage`)] <- Beginning + (seq(1,DateHole+DateHole/NbNA,by=DateHole/NbNA) %>% round())

rm(DateHole,NbNA)

# GlobalInfos <- read_excel("/Users/theotimebourgeois/Documents/Documents/Critique Film.xlsx",sheet = "Bilan")
NamesFilm <- bind_rows(CritiqueFilm, ToSeeFilm) #Merge of my two databases
NamesFilm <- NamesFilm[!is.na(NamesFilm$`Titre du film`),]
NamesFilm$Seen <- TRUE
NamesFilm$Seen[is.na(NamesFilm$Scénario)] <- FALSE
# NamesFilm$Année <- as.numeric(format(NamesFilm$`Date de sortie`, format = "%Y"))
# NamesFilm$Décénie <- round(NamesFilm$Année/10,0)*10


NamesFilm$`Notes cummulées`[is.na(NamesFilm$`Notes cummulées`)] <- NamesFilm$`Note Presse`[is.na(NamesFilm$`Notes cummulées`)]*2

NamesFilm$Mois <- NamesFilm$`Date de sortie` %>% format("%m") %>% as.numeric()

IMDB <- NamesFilm %>%
  filter(is.na(`IMDB ID`),`Pays d'origine`!="France") %>% 
  select(`English Title`,Année, Réalisateur,`IMDB ID`)
  

# Nombre de films notés
count_movies_seen <- length(CritiqueFilm$`Titre du film`)

# Nombre de films à voir
count_movies_tosee <- length(ToSeeFilm$`Titre du film`)

# Nombre total de film
count_total <- count_movies_seen + count_movies_tosee


# Le meilleur réalisateur selon nos notes (minimum 3 films)
# Le meilleur réalisateur selon la presse (minimum 3 films)
# Le réalisateur le plus sous-côté (minimum 3 films)
Director_table <- CritiqueFilm %>%
  group_by(Réalisateur) %>%
  summarise(Freq=n(),`Nos notes`=mean(`Nos notes`),`Note Presse`=mean(`Note Presse`), Surcote=mean(Différence)) %>% 
  filter(Freq>=3)

Best_director_forme <- as.character(Director_table$Réalisateur[Director_table$`Nos notes`==max(Director_table$`Nos notes`)])[1]
Best_director_forpresse <- as.character(Director_table$Réalisateur[Director_table$`Note Presse`==max(Director_table$`Note Presse`)])[1]
Surcote_director <- as.character(Director_table$Réalisateur[Director_table$Surcote==max(Director_table$Surcote)])[1]


# Le meilleur acteur (minimum 3 films)
# Le deuxième meilleur acteur (minimum 3 films)
# Le troisième meilleur acteur (minimum 3 films)
# L'acteur le plus prolifique

ActeurExcel <- read_excel("/Users/theotimebourgeois/Documents/Documents/Critique Film.xlsx", sheet = "Acteurs")
ActeurExcel <- ActeurExcel %>% select(Acteur,`Titre du film`) %>% merge(CritiqueFilm,by="Titre du film") 
ActeurExcel <- ActeurExcel %>% select(`Acteur.x`,`Titre du film`,`Nos notes`,Grade,`Note Presse`,`Emoji Pays`,`Date de sortie`)

Acteur1 <- select(CritiqueFilm,`Acteur 1`,`Titre du film`,`Nos notes`,Grade,`Note Presse`,`Emoji Pays`,`Date de sortie`)
Acteur2 <- select(CritiqueFilm,`Acteur 2`,`Titre du film`,`Nos notes`,Grade,`Note Presse`,`Emoji Pays`,`Date de sortie`)
Acteur3 <- select(CritiqueFilm,`Acteur 3`,`Titre du film`,`Nos notes`,Grade,`Note Presse`,`Emoji Pays`,`Date de sortie`)
colnames(ActeurExcel)[1] <- colnames(Acteur3)[1] <- colnames(Acteur2)[1] <- colnames(Acteur1)[1] <- "Acteur"
Acteur_merge <- rbind(Acteur1,Acteur2,Acteur3,ActeurExcel) %>% unique() %>% select(-`Titre du film`)
rm(Acteur1,Acteur2,Acteur3,ActeurExcel)

Acteur <- Acteur_merge %>%
  group_by(Acteur) %>%
  summarise(Freq=n(),Notes=round(mean(`Nos notes`),1),Presse=round(mean(`Note Presse`),1)) %>% 
  mutate(Total = Notes+Presse) %>% 
  filter(Freq>3) %>% 
  arrange(desc(Total))

Best_actor <- as.character(Acteur$Acteur[1:3])
Acteur_Max <- (Acteur %>% arrange(desc(Freq)))[1,1] %>% as.character()


# La meilleure année selon les films notés
Best_year <- (CritiqueFilm %>% group_by(Année) %>% summarise(Notes=mean(`Nos notes`), Freq=n()) %>% filter(Freq>=5) %>% arrange(desc(Notes)) %>% head(1))[1] %>% as.numeric()

# Le meilleur mois pour aller voir un film au cinéma en France
Best_Month <- month.name[(CritiqueFilm %>%
                            group_by(Mois) %>%
                            summarise(Notes=mean(`Nos notes`)) %>%
                            arrange(desc(Notes)) %>%
                            head(1))[1] %>%
                           as.numeric()]


# La meilleure société de distribution en fonction des notes

Distri_table <- CritiqueFilm %>%
  group_by(`Maison de distribution`) %>%
  summarise(Freq=n(),Note=round(mean(`Nos notes`,na.rm = T),1)) %>%
  filter(Freq>3)

Best_distri <- as.character(Distri_table$`Maison de distribution`[max(Distri_table$Note)==Distri_table$Note])

# Maison de distribution avec le plus de parts de marché

Distri <- select(CritiqueFilm,`Maison mère`,`Nos notes`)
Distri_table <- as.data.frame(table(Distri$`Maison mère`))
Distri_table <- Distri_table[Distri_table$Var1!="France",]
Distri_table <- Distri_table[order(-Distri_table$Freq),]

Most_Distri <- as.character(Distri_table$Var1[1])
Most_Distri_Percent <- round(sum(Most_Distri==CritiqueFilm$`Maison mère`, na.rm = T)*100/count_movies_seen,1)

Total_percent <- round(100*count_movies_seen/(count_movies_tosee+count_movies_seen),1)


Duration <- mean(CritiqueFilm$Durée, na.rm = T)
Duration_txt <- Duration/60
Duration_txt <- paste0(as.integer(Duration_txt),"h",round((Duration_txt-as.integer(Duration_txt))*60))

Count_days <- as.numeric(Sys.Date()-Beginning)

Duration_seen <- (Duration*count_movies_seen/Count_days)
Duration_txt_seen <- Duration_seen/60
Duration_txt_seen <- paste0(as.integer(Duration_txt_seen),"h",round((Duration_txt_seen-as.integer(Duration_txt_seen))*60))
Films_per_day <- round(count_movies_seen/Count_days,2)

Filmtoaddparday <- sum(CritiqueFilm$Année==2018 | CritiqueFilm$Année==2019, na.rm=T)/360
Nb_day <- count_movies_tosee*Duration/Duration_seen
Nb_day2 <- round(Nb_day+Nb_day*Filmtoaddparday,0)

Nb_day <- Sys.Date()+Nb_day
Nb_day2 <- Sys.Date()+Nb_day2

Sub <- paste0("Based on ",count_movies_seen," movies seen")

As a lifelong film enthusiast, I created a database in September 2019 (1283 days ago) allowing me to track the films I watch and to structure my cinephilia.
So I have seen 937 films in the last few years and I have a list of over 1638 films to see. This analysis is therefore evolving!
Who are my favourite directors? What are the best films according to me and according to the press? What kind of films are the most represented? Which actor is the most present in my filmography? All these questions will be answered in this report! I will start by giving you an overview of my film consumption and then go into more detail in the dedicated sections.

The recipe for a good film? Still unknown but if I had to summarize the 937 films I have seen, this is what I can say:

  • The best director according to our ratings (minimum 3 films) : James Cameron
  • The best director according to the press (minimum 3 films) : Dean DeBlois
  • The most underrated director (minimum 3 films) : Louis Leterrier
  • Best actor (minimum 3 films) : Zoe Saldana
  • Second best actor (minimum 3 films) : Viggo Mortensen
  • Third best actor (minimum 3 films) : Orlando Bloom
  • Most prolific actor: Hugh Jackman
  • The best year according to the rated films : 1994
  • The best month to see a film in France: May
  • The best distribution company according to the scores : Pixar
  • Distribution company with the highest market share : Disney (25.6%)
  • Percentage of advancement : 36.4%
  • Average duration of a film : 1h50
  • Daily time spent watching films : 1h21 or 0.73 film per day
  • End date if no film is added to my list again : 2029-05-07
  • End date if I add films at the same rate as today : 2031-06-13

Explanation of my scoring system


In order to establish a ranking of films, actors, directors… I had to decide on some rating criteria that will allow me to evaluate the main elements that make up a film. So here are the 5 criteria I rate out of 5:

Explication_Sample <- CritiqueFilm[CritiqueFilm$Grade=="A" & CritiqueFilm$`Note Presse`>=4 & CritiqueFilm$Saga=="Saga" & !is.na(CritiqueFilm$`English Title`) & CritiqueFilm$`Pays d'origine`!="France",]
Explication_Sample <- select(Explication_Sample,`English Title`,Scénario,`Acteurs / Personnages`,`Ambiance / Concept`,`Aspect Visuel`,`Aspect Sonore`)
Explication_Sample <- as.data.frame(Explication_Sample)


Explication_Sample_Scenario <- sample(Explication_Sample$`English Title`[Explication_Sample$Scénario==5],3)
Explication_Sample_Scenario <- paste0(Explication_Sample_Scenario[1],", ",Explication_Sample_Scenario[2]," or ",Explication_Sample_Scenario[3])
Explication_Sample <- Explication_Sample[!(Explication_Sample$`English Title` %in% Explication_Sample_Scenario),]

Explication_Sample_Acteur <- sample(Explication_Sample$`English Title`[Explication_Sample$`Acteurs / Personnages`==5],3)
Explication_Sample <- Explication_Sample[!(Explication_Sample$`English Title` %in% Explication_Sample_Acteur),]
Explication_Sample_Acteur <- paste0(Explication_Sample_Acteur[1],", ",Explication_Sample_Acteur[2]," or ",Explication_Sample_Acteur[3])

Explication_Sample_Ambiance <- sample(Explication_Sample$`English Title`[Explication_Sample$`Ambiance / Concept`==5],3)
Explication_Sample <- Explication_Sample[!(Explication_Sample$`English Title` %in% Explication_Sample_Ambiance),]
Explication_Sample_Ambiance <- paste0(Explication_Sample_Ambiance[1],", ",Explication_Sample_Ambiance[2]," or ",Explication_Sample_Ambiance[3])

Explication_Sample_Visuel <- sample(Explication_Sample$`English Title`[Explication_Sample$`Aspect Visuel`==5],3)
Explication_Sample <- Explication_Sample[!(Explication_Sample$`English Title` %in% Explication_Sample_Visuel),]
Explication_Sample_Visuel <- paste0(Explication_Sample_Visuel[1],", ",Explication_Sample_Visuel[2]," or ",Explication_Sample_Visuel[3])

Explication_Sample_Sonore <- sample(Explication_Sample$`English Title`[Explication_Sample$`Aspect Sonore`==5],3)
Explication_Sample <- Explication_Sample[!(Explication_Sample$`English Title` %in% Explication_Sample_Sonore),]
Explication_Sample_Sonore <- paste0(Explication_Sample_Sonore[1],", ",Explication_Sample_Sonore[2]," or ",Explication_Sample_Sonore[3])

scoring_system <- as.data.frame(matrix(data = NA, nrow = 5, ncol = 2))
scoring_system[,1] <- c("Scenario","Actors and characters","Atmosphere and concept","Visual aspect","Sound aspect")
scoring_system[1,2] <- mean(CritiqueFilm$Scénario, na.rm = T)
scoring_system[2,2] <- mean(CritiqueFilm$`Acteurs / Personnages`, na.rm = T)
scoring_system[3,2] <- mean(CritiqueFilm$`Ambiance / Concept`, na.rm = T)
scoring_system[4,2] <- mean(CritiqueFilm$`Aspect Visuel`, na.rm = T)
scoring_system[5,2] <- mean(CritiqueFilm$`Aspect Sonore`, na.rm = T)
scoring_system[,2] <- round(scoring_system[,2],2)
colnames(scoring_system) <- c("Categories","Grade")

CategoryDB <- CritiqueFilm %>%
  select(Scénario,`Acteurs / Personnages`,`Ambiance / Concept`,`Aspect Visuel`, `Aspect Sonore`) %>%
  pivot_longer(cols = c(Scénario,`Acteurs / Personnages`,`Ambiance / Concept`,`Aspect Visuel`, `Aspect Sonore`), 
    names_to = "text", 
    values_to = "value") %>%
  as.data.frame() %>% 
  mutate(value=value %>% round(0)) %>% 
  mutate(text = fct_reorder(text, value))

colfunc2 <- colorRampPalette(c(blue,purple))

CategoryDBMean <- CategoryDB %>% group_by(text) %>% summarise(value=mean(value))

ggplot(CategoryDB, aes(x=value, y=text, fill=text)) +
  geom_segment(aes(x=0, xend=5, yend=text, col=text))+
  geom_density_ridges(scale=1.1, bandwidth = 0.4,quantile_fun=function(value,...)mean(value), quantile_lines = TRUE, col=white) +
  geom_text(data=CategoryDBMean, aes(x=value, y=text, label=value %>% round(2)), col=white, family = "AvertaPE-Black", hjust=-.5, vjust=-0.5)+
  geom_text(aes(x=0.02, y=text, label=text, col=text), family = "AvertaPE-Black", hjust=0, vjust=-.3)+
  labs(title="Average of grades per Categorie",
       subtitle="Sub",
       y="Category", x="Grade")+
  scale_x_continuous(limits = c(0, 5), expand=c(0,0))+
  scale_y_discrete(expand=c(0,0))+
  scale_fill_manual(values = colfunc2(5))+
  scale_color_manual(values = colfunc2(5))+
  theme(text=element_text(size=12, family="AvertaPE-Regular"),
        panel.background = element_blank(),
        legend.position = "none",
        legend.background = element_blank(),
        axis.line = element_line(colour = purple),
        axis.text.y = element_blank(),
        plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
        plot.caption = element_text(size = 10, color = blue))

  • The scenario: Essential for a good film, it keeps us on the edge of our seats, makes us passionate, questions us and is in my opinion the most important. A film with an impeccable visual quality without a script will remain a bad film. Here are for example 3 films that I evaluated with an excellent script: The Green Mile, Inception or The Prestige

  • Actors and characters: This category is an indissociable part of the rating system and allows us to identify whether the casting is successful and therefore whether the actors are good and correspond perfectly to the character they play. This category is obviously rated higher than the others since the actors contribute most to the credibility of a film and most of the time give their best as in : Titanic, The King’s Speech or The Green Mile

  • Atmosphere and concept: Each film has its own universe that can transport us and sometimes we want to see more… or not! The atmosphere of the film allows us to stay hooked to the plot and to feel unique emotions. The concept allows innovation in an environment that we think is already saturated but we will see that many recent films have really new concepts like : Forgotten Silver, Gone Girl or Life of Pi

  • Visual aspect: The aesthetics of the film is a central element. The visual aspect consists in evaluating the visual beauty of the film, its risk-taking, its camera movements, its editing, its special effects, its photography etc. Here are some films with an interesting visual aspect: Sully, The Shining or Ready Player One

  • Sound aspect: Finally, the sound aspect echoes the atmosphere of the film as it includes both the soundtrack and all the work done on sound, sound effects etc. to make it all coherent. Although the soundtrack has a central place in the evaluation of this criterion, some films enjoy quite incredible sound effects that sometimes absorb the musical theme. Here are 3 films with impeccable sound effects: Hacksaw Ridge, Don’t Look Up or Interstellar

Top of my movies

TopFilms <- CritiqueFilm[str_detect(CritiqueFilm$`Noté par`, "Théotime", negate = FALSE),]

TopFilms <- select(TopFilms,`English Title`,Année,`Emoji Pays`, Réalisateur,`Notes cummulées`)
TopFilms <- TopFilms[order(-TopFilms$`Notes cummulées`),]
colnames(TopFilms) <- c("Title","Year","Country","Director","Grade")
Top <- 100
TopFilms <- head(TopFilms,Top)
TopFilms$Country[TopFilms$Country=="United States of America"] <- "USA"
TopFilms$Rank <- 1:Top
TopFilms <- TopFilms %>% select(6,1:5)

TopFilms %>%
  mutate(Grade = color_tile(blue, purple)(Grade)) %>% 
  kable(escape = F, align = c("c","l", "c", "c", "l", "c")) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed")) %>%
  column_spec(2, bold = T) %>%
  column_spec(6, bold = T, color = white) %>%
  scroll_box(width = "100%", height = "400px")
Rank Title Year Country Director Grade
1 Forrest Gump 1994 🇺🇸 Robert Zemeckis 9.6
2 The Dark Knight 2008 🇺🇸 Christopher Nolan 9.5
3 The Lion King 1994 🇺🇸 Roger Allers 9.5
4 Joker 2019 🇺🇸 Todd Philips 9.5
5 The Green Mile 2000 🇺🇸 Frank Darabont 9.5
6 The Lord of the Rings: The Fellowship of the Ring 2001 🇺🇸 Peter Jackson 9.5
7 The Lord of the Rings: The Two Towers 2002 🇺🇸 Peter Jackson 9.5
8 The Lord of the Rings: The Return of the King 2003 🇺🇸 Peter Jackson 9.5
9 Pulp Fiction 1994 🇺🇸 Quentin Tarantino 9.5
10 1917 2020 🇬🇧 Sam Mendes 9.4
11 Bohemian Rhapsody 2018 🇺🇸 Bryan Singer 9.4
12 Dune 2021 🇺🇸 Denis Villeneuve 9.4
13 Interstellar 2014 🇺🇸 Christopher Nolan 9.4
14 Spider-Man: Into the Spider-Verse 2018 🇺🇸 Peter Ramsey 9.4
15 Avatar 2009 🇺🇸 James Cameron 9.3
16 Avatar 2 2022 🇺🇸 James Cameron 9.3
17 Zack Snyder’s Justice League 2021 🇺🇸 Zack Snyder 9.3
18 Soul 2020 🇺🇸 Pete Docter 9.3
19 Django Unchained 2013 🇺🇸 Quentin Tarantino 9.3
20 Kingsman: The Secret Service 2015 🇺🇸 Matthew Vaughn 9.3
21 Léon: The Professional 1994 🇫🇷 Luc Besson 9.3
22 Slumdog Millionaire 2009 🇬🇧 Danny Boyle 9.3
23 The Truman Show 1998 🇺🇸 Peter Weir 9.3
24 Titanic 1998 🇺🇸 James Cameron 9.3
25 Toy Story 3 2010 🇺🇸 Lee Unkrich 9.2
26 How to Train Your Dragon 2010 🇺🇸 Dean DeBlois 9.2
27 Inception 2010 🇺🇸 Christopher Nolan 9.2
28 Jurassic Park 1993 🇺🇸 Steven Spielberg 9.2
29 Guardians of the Galaxy 2014 🇺🇸 James Gunn 9.2
30 Back to the Future 1985 🇺🇸 Robert Zemeckis 9.2
31 Star Wars: Episode III – Revenge of the Sith 2005 🇺🇸 George Lucas 9.2
32 Star Wars : Episode V – The Empire Strikes Back 1980 🇺🇸 Irvin Kershner 9.2
33 Star Wars: Episode VI – Return of the Jedi 1983 🇺🇸 Richard Marquand 9.2
34 Hacksaw Ridge 2016 🇺🇸 Mel Gibson 9.2
35 Coco 2017 🇺🇸 Lee Unkrich 9.1
36 Green Book 2018 🇺🇸 Peter Farrelly 9.1
37 Harry Potter and the Deathly Hallows: Part 2 2011 🇺🇸 David Yates 9.1
38 Skyfall 2012 🇬🇧 Sam Mendes 9.1
39 The Curious Case of Benjamin Button 2009 🇺🇸 David Fincher 9.1
40 Rise of the Planet of the Apes 2011 🇺🇸 Rupert Wyatt 9.1
41 The Pianist 2002 🇫🇷 Roman Polanski 9.1
42 The Shawshank Redemption 1995 🇺🇸 Frank Darabont 9.1
43 Guardians of the Galaxy Vol. 2 2017 🇺🇸 James Gunn 9.1
44 Avengers: Infinity War 2018 🇺🇸 Frères Russo 9.1
45 Spider-Man: No Way Home 2021 🇺🇸 Jon Watts 9.1
46 Parasite 2019 🇰🇷 Bong Joon-ho 9.1
47 Rogue One: A Star Wars Story 2016 🇺🇸 Gareth Edwards 9.1
48 The Incredibles 2004 🇺🇸 Brad Bird 9.0
49 Incredibles 2 2018 🇺🇸 Brad Bird 9.0
50 The Great Gatsby 2013 🇺🇸 Baz Luhrmann 9.0
51 Casino Royale 2006 🇬🇧 Martin Campbell 9.0
52 Kick-Ass 2010 🇺🇸 Matthew Vaughn 9.0
53 Life of Pi 2012 🇺🇸 Ang Lee 9.0
54 Ford v Ferrari 2019 🇺🇸 James Mangold 9.0
55 Marvel’s The Avengers 2012 🇺🇸 Joss Whedon 9.0
56 Avengers: Endgame 2019 🇺🇸 Frères Russo 9.0
57 Pirates of the Caribbean: The Curse of the Black Pearl 2003 🇺🇸 Gore Verbinski 9.0
58 Reservoir Dogs 1992 🇺🇸 Quentin Tarantino 9.0
59 Spider-Man 2002 🇺🇸 Sam Raimi 9.0
60 X-Men: Days of Future Past 2014 🇺🇸 Bryan Singer 9.0
61 Your Name. 2016 🇯🇵 Makoto Shinkai 9.0
62 Batman Begins 2005 🇺🇸 Christopher Nolan 9.0
63 War for the Planet of the Apes 2017 🇺🇸 Matt Reeves 8.9
64 Limitless 2011 🇺🇸 Neil Burger 8.9
65 Sherlock Holmes 2010 🇺🇸 Guy Ritchie 8.9
66 Star Trek Into Darkness 2013 🇺🇸 J. J. Abrams 8.9
67 Knives Out 2019 🇺🇸 Rian Johnson 8.9
68 Aladdin 1992 🇺🇸 John Musker et Ron Clements 8.9
69 Toy Story 1996 🇺🇸 John Lasseter 8.9
70 WALL‐E 2008 🇺🇸 Andrew Stanton 8.9
71 The Prestige 2006 🇺🇸 Christopher Nolan 8.9
72 Charlie and the Chocolate Factory 2005 🇺🇸 Tim Burton 8.8
73 Monsters, Inc.  2002 🇺🇸 Pete Docter 8.8
74 Zootopia 2016 🇺🇸 Byron Howard 8.8
75 How to Train Your Dragon 2 2014 🇺🇸 Dean DeBlois 8.8
76 Gladiator 2000 🇺🇸 Ridley Scott 8.8
77 Harry Potter and the Deathly Hallows: Part 1 2010 🇺🇸 David Yates 8.8
78 No Time to Die 2021 🇬🇧 Cary Joji Fukunaga 8.8
79 Kingsman: The Golden Circle 2017 🇺🇸 Matthew Vaughn 8.8
80 Spirited Away 2002 🇯🇵 Hayao Miyazaki 8.8
81 Mad Max: Fury Road 2015 🇦🇺 George Miller 8.8
82 Pirates of the Caribbean: Dead Man’s Chest 2006 🇺🇸 Gore Verbinski 8.8
83 Back to the Future Part II 1989 🇺🇸 Robert Zemeckis 8.8
84 Shrek 2001 🇺🇸 Andrew Adamson 8.8
85 Spider-Man 2 2004 🇺🇸 Sam Raimi 8.8
86 Star Wars: Episode II – Attack of the Clones 2002 🇺🇸 George Lucas 8.8
87 Star Wars: Episode IV – A New Hope 1977 🇺🇸 George Lucas 8.8
88 The Grand Budapest Hotel 2014 🇺🇸 Wes Anderson 8.8
89 Whiplash 2014 🇺🇸 Damien Chazelle 8.8
90 Ratatouille 2007 🇺🇸 Brad Bird 8.7
91 Wreck‐It Ralph 2012 🇺🇸 Rich Moore 8.7
92 How to Train Your Dragon: The Hidden World 2019 🇺🇸 Dean DeBlois 8.7
93 Dunkirk 2017 🇺🇸 Christopher Nolan 8.7
94 Cyrano, My Love 2019 🇫🇷 Alexis Michalik 8.7
95 Gone Girl 2014 🇺🇸 David Fincher 8.7
96 The Imitation Game 2014 🇺🇸 Morten Tyldum 8.7
97 Klaus 2019 🇪🇸 Sergio Pablos 8.7
98 Dawn of the Planet of the Apes 2014 🇺🇸 Matt Reeves 8.7
99 The Lion King 2019 🇺🇸 Jon Favreau 8.7
100 The Matrix 1999 🇺🇸 Les Wachowski 8.7

My consumption over time

YearMin <- min(CritiqueFilm$Année, na.rm = TRUE)
YearMax <- max(CritiqueFilm$Année, na.rm = TRUE)

Year_df <- data.frame(YearMin:YearMax)
for (k in 1:nrow(Year_df)){
  Year_df$CritiqueFilm[k] <- sum(as.numeric(CritiqueFilm$Année==Year_df$YearMin.YearMax[k]),na.rm = TRUE)
  Year_df$NamesFilm[k] <- sum(as.numeric(NamesFilm$Année==Year_df$YearMin.YearMax[k]),na.rm = TRUE)
  Year_df$Total <- Year_df$CritiqueFilm+Year_df$NamesFilm
}
Year_df <- as.data.frame(Year_df)

YearMin_graph <- 1998
YearMax_graph <- 2023

Year_Grade <- select(CritiqueFilm,Année,`Nos notes`)
Year_Grade <- Year_Grade[Year_Grade$Année>=YearMin_graph & Year_Grade$Année<=YearMax_graph,]

Year_Grade_2019 <- round(mean(Year_Grade$`Nos notes`[Year_Grade$Année==2019], na.rm = T),1)
Year_Grade_Min <- round(min(Year_Grade$`Nos notes`, na.rm = T),1)

Year_Grade_graph <- ggplot(Year_Grade, aes(Année, `Nos notes`)) +
  geom_bin2d(binwidth = c(1, 1/3))+
  scale_x_continuous(breaks = seq(YearMin_graph, YearMax_graph, 2))+
  geom_smooth(method = lm, col = white, se = FALSE)+
  scale_fill_gradient(low=purple, high = blue)+
  labs(title="Count of films per Year",
       subtitle=paste0("from ",YearMin_graph," to ",YearMax_graph),
       y="Grade", x="Year", fill="Count")+
  theme(text=element_text(size=12, family="AvertaPE-Regular"),
        panel.background = element_blank(),
        legend.position = "none",
        legend.background = element_blank(),
        axis.line = element_line(colour = purple),
        plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
        plot.caption = element_text(size = 10, color = blue))
Year_Grade_graph


This graph represents my film consumption since 1998, the year I was born. The lighter the colour, the more films I have seen with that rating in that period.
Since 2019, the creation of my database, we see a greater diversity of bad and good films with a tendency to be average overall. While the years before the creation of my file have higher average scores because they correspond to good films that “must” be seen.
Eventually, the aim will be to see more films over this period to complete each square from 1 to 5 in score and see a real trend that I imagine is decreasing.

Decades <- as.data.frame.matrix(table(select(NamesFilm,Décénie,Seen)))
colnames(Decades) <- c("To see","Seen")
Decades$Decades <- as.numeric(rownames(Decades))
Decades$Total <- as.numeric(Decades$`To see`+Decades$Seen)

Decades_graph <- ggplot(Decades)+
  geom_area(aes(x = Decades, y = Total, fill = "Movies to see"))+
  geom_area(aes(x = Decades, y = Seen, fill = "Movies seen"))+
  geom_label(aes(x = Decades, y = Seen, label = paste(Seen)),
            fill=purple,
            colour = white,
            check_overlap = T) +
  scale_fill_manual(values=c(purple, blue))+
  scale_x_continuous(breaks = seq(1930,2020,10))+
  labs(title = "Volume of films to be seen and films seen\naccording to recommendations",
       y="Number of films",x="Decade",
       fill="Legend")+
  theme(text=element_text(size=12, family="AvertaPE-Regular"),
        panel.background = element_blank(),
        legend.position = "bottom",
        legend.background = element_blank(),
        axis.line = element_line(colour = purple),
        plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
        plot.caption = element_text(size = 10, color = blue))
Decades_graph


This graph represents my film consumption since 1998, the year I was born. The lighter the colour, the more films I have seen with that rating in that period.
Since 2019, the creation of my database, we see a greater diversity of bad and good films with a tendency to be average overall. While the years before the creation of my file have higher average scores because they correspond to good films that “must” be seen.
Eventually, the aim will be to see more films over this period to complete each square from 1 to 5 in score and see a real trend that I imagine is decreasing.

CritiqueFilm$Différence <- CritiqueFilm$Différence %>% abs()

TopDiff <- filter(CritiqueFilm, CritiqueFilm$Différence > 1.4)

ggplot(CritiqueFilm, aes(`Note Presse`, `Nos notes`)) +
  stat_density_2d(aes(fill = ..level..), geom = "polygon")+
  geom_smooth(col = pink, se = FALSE, method = "lm")+
  geom_abline(intercept = 0, color = grey) +
  geom_point(data= TopDiff,aes(`Note Presse`, `Nos notes`), color=purple, size=2)+
  geom_label_repel(data = TopDiff, aes(label = TopDiff$`English Title`),
            vjust = 0.1, hjust = 0.1,
            family="AvertaPE-Regular",
            size = 7/.pt)+
  scale_fill_gradient(low=purple, high = blue)+
  xlim(1.5,5.4)+
  ylim(1.5,5.4)+
  labs(title="Rating of the film compared to the press ratings",
       subtitle="Trend of overnotting")+
  theme(text=element_text(size=12, family="AvertaPE-Regular"),
        panel.background = element_blank(),
        legend.position = "none",
        legend.background = element_blank(),
        axis.line = element_line(colour = purple),
        plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
        plot.caption = element_text(size = 10, color = blue))



My consumption

Date_Evolution <- CritiqueFilm %>%
  select(`Dernier visionnage`) %>%
  `colnames<-`(c("Date")) %>% 
  filter(!is.na(`Date`),
         Date >= Sys.Date() %m-% years(2)) %>%
  arrange(`Date`) %>% 
  mutate(Week = format(`Date`, format = "%V") %>% as.numeric(),
         Day = format(`Date`, format = "%d") %>% as.numeric(),
         Month = format(`Date`, format = "%m") %>% as.numeric(),
         LastYear = `Date`>=(Sys.time()-(365*24*60*60)))

Date_Evolution$Week <- Date_Evolution$Week+53-(format(Sys.time(),"%V") %>% as.numeric())
Date_Evolution$Week[Date_Evolution$Week>52] <- Date_Evolution$Week[Date_Evolution$Week>52]-52
Date_Evolution$Week <- round(Date_Evolution$Week,0)

Date_Evolution_table <- Date_Evolution %>%
  select(Week,LastYear) %>%
  table() %>%
  as.data.frame() %>%
  ungroup() %>%
  mutate(Week=as.numeric(Week))

ggplot(Date_Evolution_table, aes(x=Week, y=Freq, group = LastYear, color = LastYear))+
  geom_text(data=(Date_Evolution_table %>% filter(Week==52)),
            aes(x=52, y=Freq, label=paste(Freq,"movies")), size=3.5, hjust=1, vjust=0, family = "AvertaPE-Black",
            key_glyph = "smooth")+
  geom_vline(xintercept=52, col=pink, size=1.1)+
  geom_smooth(method = lm, formula = y ~ splines::bs(x, 7), se = FALSE)+
  labs(title="Identify a decrease and gaps in my consumption",
       subtitle="Film consumption over a year by week",
       color = "Timeline",
       x="Week", y="Count")+
  scale_color_manual(values = c(white, yellow), label = c("Global","This year"))+
  scale_x_continuous(breaks = seq(0,52,by=2))+
  theme(text=element_text(size=12, family="AvertaPE-Regular",colour = white),
        title=element_text(colour = white),
        panel.background = element_rect(fill = purple),
        plot.background = element_rect(fill = purple, color = purple),
        panel.grid.major = element_line(colour = purple),
        panel.grid.minor = element_line(colour = purple),
        panel.border = element_blank(),
        panel.margin.x = NULL,
        panel.margin.y = NULL,
        legend.text = element_text(colour = white),
        legend.title = element_text(colour = white),
        legend.position = "right",
        legend.background = element_blank(),
        legend.key=element_blank(),
        axis.text = element_text(colour = white),
        #axis.text.x = element_blank(),
        axis.line = element_line(colour = purple),
        plot.title = element_text(size = 16, family = "AvertaPE-Black", color = white),
        plot.caption = element_text(size = 10, color = blue))

Among the annual objectives, film consumption is central and must be more or less stable to achieve them. These curves allow us to identify the periodicity of this consumption according to the weeks on a sliding year with the current month on the right. Keeping the current year’s curve above the overall curve may be a priority to complete my cinephilia and achieve my goals. This filmography is a race against time and can be optimised by segmenting the films to see. Each must-see film is scored from 0 to 100% where 100 is the highest level of recommendation. Few are above 90% and can be considered a priority. The Academy Awards can also be an indicator of “quality” but more importantly of visibility, highlighting a variety of films although this selection is heavily influenced. Despite this sectorisation, the list of films to be seen is getting longer as well as shorter, but with a constant viewing frequency of one film per day, the list should be completed.

Table_Duration <- matrix(data = NA, ncol = 2, nrow = 4) %>%
  as.data.frame() %>% 
  `colnames<-`(c("Data","Caption"))

Table_Duration$Data[1] <- NamesFilm %>% filter(Reco > 0.9) %>% nrow()
Table_Duration$Caption[1] <- "movies with a recommendation higher than 90%"

Table_Duration$Data[2] <- NamesFilm %>% filter(str_detect(NamesFilm$Source,"#Oscar")) %>% nrow()
Table_Duration$Caption[2] <- "Academy Awards nominated films on my must-see list"

Table_Duration$Data[3] <- paste0(round((NamesFilm$Durée[NamesFilm$Seen==FALSE] %>% sum(na.rm = T))/60,0),"h")
Table_Duration$Caption[3] <- "cumulative duration of the films to be seen"

Table_Duration$Data[4] <- NamesFilm %>% filter(Seen==TRUE, `Dernier visionnage` > (Sys.Date() %m-% months(1))) %>% nrow()
Table_Duration$Caption[4] <- "films seen this past month"

Table_Duration %>%
  t() %>%
  as.data.frame() %>%
  kable(escape = F, align = c(rep("c", 10)),col.names = NULL, row.names = FALSE, booktabs = TRUE) %>%
  kable_styling(full_width = T) %>% 
  column_spec(1:4, width = "30em") %>% 
  row_spec(1, bold = T, color = yellow, font_size = 30) %>% 
  row_spec(2, bold = T, color = white)
5 268 2894h 20
movies with a recommendation higher than 90% Academy Awards nominated films on my must-see list cumulative duration of the films to be seen films seen this past month


Directors

DirectorTop <- 40
DirectorHead <- as.data.frame(table(CritiqueFilm$Réalisateur))
DirectorHead <- DirectorHead[order(-DirectorHead$Freq),]
DirectorHead <- DirectorHead[DirectorHead$Freq>=3,]
#DirectorHead <- head(DirectorList,DirectorTop)
colnames(DirectorHead) <- c("Director","Freq")
DirectorHead <- as.data.frame(DirectorHead)
DirectorHead$Director <- as.character(DirectorHead$Director)

for (k in 1:nrow(DirectorHead)){
      DirectorHead$Presse[k]=round(mean(CritiqueFilm[CritiqueFilm$Réalisateur==DirectorHead$Director[k],]$`Note Presse`,na.rm = T),1)
      DirectorHead$OurGrades[k]=round(mean(CritiqueFilm[CritiqueFilm$Réalisateur==DirectorHead$Director[k],]$`Nos notes`,na.rm = T),1)
      Pays <- CritiqueFilm$`Emoji Pays`[CritiqueFilm$Réalisateur==DirectorHead$Director[k]] %>% unique()
      DirectorHead$Countries[k] <- paste0(Pays[1],Pays[2],Pays[3],Pays[4],Pays[5],Pays[6],Pays[7],Pays[8],Pays[9],Pays[10],Pays[11]) %>% str_replace_all(pattern = "NA","")
}

DirectorHead$Diff <- DirectorHead$OurGrades-DirectorHead$Presse
DirectorHead$Total <- DirectorHead$Presse + DirectorHead$OurGrades
DirectorHead <- DirectorHead[order(-DirectorHead$Total),]
DirectorHead$Rank <- rownames(DirectorHead) <- 1:nrow(DirectorHead)

DirectorHead <- select(DirectorHead, Rank, Director, Countries, Freq, OurGrades, Presse, Diff, Total)

DirectorHead %>% 
  mutate(Total = color_tile(blue, purple)(Total)) %>%
  head(50) %>% 
  kable(escape = F, align = c("l","l","c", "c", "c", "c", "c", "c")) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed")) %>%
  column_spec(2, bold = T) %>%
  column_spec(8, bold = T, color = white) %>%
  scroll_box(width = "100%", height = "400px")
Rank Director Countries Freq OurGrades Presse Diff Total
1 James Cameron 🇺🇸 3 5.0 4.3 0.7 9.3
2 Christopher Nolan 🇺🇸 8 4.8 4.2 0.6 9.0
3 Peter Jackson 🇳🇿🇺🇸 6 4.7 4.2 0.5 8.9
4 Brad Bird 🇺🇸 3 4.7 4.2 0.5 8.9
5 George Lucas 🇺🇸 4 4.8 4.1 0.7 8.9
6 Dean DeBlois 🇺🇸 3 4.6 4.3 0.3 8.9
7 Matthew Vaughn 🇺🇸 5 4.8 4.0 0.8 8.8
8 Pete Docter 🇺🇸 4 4.5 4.3 0.2 8.8
9 Gore Verbinski 🇺🇸 3 4.8 4.0 0.8 8.8
10 Matt Reeves 🇺🇸 3 4.8 4.0 0.8 8.8
11 Sam Mendes 🇬🇧 3 4.8 4.0 0.8 8.8
12 Quentin Tarantino 🇺🇸 6 4.5 4.2 0.3 8.7
13 Andrew Stanton 🇺🇸 3 4.5 4.2 0.3 8.7
14 Martin Scorsese 🇺🇸 4 4.4 4.2 0.2 8.6
15 Sam Raimi 🇺🇸 4 4.8 3.7 1.1 8.5
16 Guy Ritchie 🇺🇸 5 4.5 3.9 0.6 8.4
17 Bong Joon-ho 🇰🇷 3 4.4 4.0 0.4 8.4
18 David Yates 🇺🇸 7 4.6 3.8 0.8 8.4
19 Frères Russo 🇺🇸 4 4.3 4.1 0.2 8.4
20 J. J. Abrams 🇺🇸 4 4.6 3.7 0.9 8.3
21 Bryan Singer 🇺🇸 6 4.3 3.9 0.4 8.2
22 David Fincher 🇺🇸 4 4.2 4.0 0.2 8.2
23 Wes Anderson 🇺🇸 4 4.3 3.9 0.4 8.2
24 Hayao Miyazaki 🇯🇵 8 3.9 4.2 -0.3 8.1
25 Steven Spielberg 🇺🇸 6 4.2 3.9 0.3 8.1
26 John Lasseter 🇺🇸 5 4.2 3.9 0.3 8.1
27 Clint Eastwood 🇺🇸 3 4.0 4.1 -0.1 8.1
28 Jon Watts 🇺🇸 3 4.2 3.9 0.3 8.1
29 James Mangold 🇺🇸 5 4.1 3.9 0.2 8.0
30 Zack Snyder 🇺🇸 7 4.3 3.6 0.7 7.9
31 Ridley Scott 🇺🇸 6 3.9 4.0 -0.1 7.9
32 Rian Johnson 🇺🇸 4 4.3 3.6 0.7 7.9
33 George Miller 🇺🇸🇦🇺 3 4.4 3.5 0.9 7.9
34 Robert Zemeckis 🇺🇸 8 4.1 3.8 0.3 7.9
35 Gary Trousdale et Kirk Wise 🇺🇸 3 4.1 3.8 0.3 7.9
36 Ang Lee 🇺🇸 3 4.2 3.6 0.6 7.8
37 Chad Stahelski 🇺🇸 3 4.2 3.6 0.6 7.8
38 Marc Webb 🇺🇸 3 4.1 3.7 0.4 7.8
39 Tim Burton 🇺🇸 9 3.9 3.8 0.1 7.7
40 Andrew Adamson 🇺🇸 4 4.1 3.6 0.5 7.7
41 Francis Lawrence 🇺🇸 5 4.0 3.6 0.4 7.6
42 James Gunn 🇺🇸 5 3.9 3.7 0.2 7.6
43 John Musker et Ron Clements 🇺🇸 5 3.7 3.9 -0.2 7.6
44 Carlos Saldanha 🇺🇸 4 3.8 3.8 0.0 7.6
45 Danny Boyle 🇬🇧🇺🇸 4 4.0 3.6 0.4 7.6
46 Guillermo del Toro 🇺🇸 3 4.1 3.5 0.6 7.6
47 Pierre Coffin 🇺🇸 4 3.8 3.7 0.1 7.5
48 Jennifer Yuh Nelson 🇺🇸 3 3.8 3.7 0.1 7.5
49 Taika Waititi 🇺🇸 3 4.0 3.5 0.5 7.5
50 Jon Favreau 🇺🇸 4 3.9 3.5 0.4 7.4
Director_graph <- ggplot(DirectorHead, aes(x = OurGrades, y = Freq))+
  geom_vline(xintercept = mean(DirectorHead$OurGrades,na.rm = T),
            col = grey)+
  geom_point(aes(size = Freq, colour = OurGrades > mean(OurGrades,na.rm = T)))+
  scale_size_continuous(range=c(0.5,5)) +
  xlim(min(DirectorHead$OurGrades),5.5)+
  geom_smooth(col = pink, method = lm, formula = y ~ splines::bs(x, 2), se = FALSE)+
  geom_text(aes(label = paste0(Director,": ",OurGrades),colour = OurGrades > mean(OurGrades,na.rm = T)),
            hjust=-0.1,
            vjust=-0.5,
            check_overlap = T) +
  geom_text(aes(x=mean(OurGrades,na.rm = T)-0.1,label=round(mean(OurGrades,na.rm = T),2), y=max(Freq)+1),
            colour=grey,
            angle=0,
            vjust = 1.2,
            family="AvertaPE-Regular",
            size = 9/.pt)+
  scale_color_manual(values=mypal(2)) +
  theme(text=element_text(size=12, family="AvertaPE-Regular"),
        panel.background = element_blank(),
        legend.position = "none",
        legend.background = element_blank(),
        axis.line = element_line(colour = purple),
        plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
        plot.caption = element_text(size = 10, color = blue))+
  labs(title="Directors by volume and score",
       subtitle = "Sub",
       y="Frequency", x="Grade")
Director_graph

AListed <- CritiqueFilm %>% 
  select(Réalisateur, `Notes cummulées`) %>%
  `colnames<-`(c("Réalisateur","Notes")) %>% 
  group_by(Réalisateur) %>% 
  mutate(Max = max(Notes),
         Min = min(Notes),
         Mean = round(mean(Notes),1),
         Count = n()) %>% 
  arrange(desc(Count)) %>% 
  select(-Notes) %>% 
  unique() %>% 
  head(15) %>% 
  arrange(desc(Mean))

ggplot(AListed, aes(y = Réalisateur)) +
  geom_segment(aes(x=Min, xend=Max, y=Réalisateur, yend=Réalisateur),color="grey", size=.5)+
  geom_point(aes(x=Max,color="Max"), size=2)+
  geom_point(aes(x=Min,color="Min"), size=2)+
  geom_point(aes(x=Mean), color=yellow, size=7)+
  geom_text(aes(x=Mean, label = Count), col = purple, family = "AvertaPE-Black")+
  scale_y_discrete(limits = rev(AListed$Réalisateur))+
  scale_color_manual(values = c("Min" = pink, "Max" = blue), labels = c("Minimum", "Maximum", "Range"))+
  labs(title = "Director ratings with range",
       subtitle = "between worst and best film",
       x = "Grades", y = NULL,
       color = "Grades",
       caption = "Source : Critique Films")+
  theme(text=element_text(size=12, family="AvertaPE-Regular"),
        panel.background = element_blank(),
        legend.position = "right",
        legend.background = element_blank(),
        axis.line = element_line(colour = purple),
        plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
        plot.caption = element_text(size = 10, color = blue))

ListGenreTop <- rbind(CritiqueFilm$`Genre 1` %>% table() %>% as.data.frame(),CritiqueFilm$`Genre 2` %>% table() %>% as.data.frame()) %>% `colnames<-`(c("Genre","Freq")) %>% group_by(Genre) %>% summarise(Freq=sum(Freq)) %>% arrange(desc(Freq)) %>% head(15) %>% ungroup()
ListGenreTop <- ListGenreTop$Genre %>% droplevels() %>% unlist() %>% levels()

library(reshape2)

RéalGenre <- rbind(
  CritiqueFilm %>% select(`Notes cummulées`,`Genre 1`,Réalisateur) %>% `colnames<-`(c("Notes","Genre","Réalisateur")),
  CritiqueFilm %>% select(`Notes cummulées`,`Genre 2`,Réalisateur) %>% `colnames<-`(c("Notes","Genre","Réalisateur"))) %>% 
  group_by(Réalisateur,Genre) %>% 
  summarise(Notes=mean(Notes)) %>% 
  filter(!is.na(Genre), Genre %in% ListGenreTop, Réalisateur %in% AListed$Réalisateur) %>% 
  mutate(Notes=round(Notes,1))

#dcast(RéalGenre, Réalisateur~Genre, value.var='Notes', fill='')
RéalGenre$Genre[RéalGenre$Genre=="SF"] <- "Science Fiction"

RéalGenre <- RéalGenre %>%
  group_by(Genre) %>%
  summarise(Max=max(Notes)) %>%
  merge(RéalGenre, on="Genre") %>% 
  mutate(Max = Max==Notes)

RéalGenreMax <- RéalGenre %>% filter(Max)

ggplot(RéalGenre,aes(x=Genre,y=Réalisateur))+
  geom_point(aes(x=Genre,y=Réalisateur, color=Notes), size=6)+
  geom_point(data=RéalGenreMax,aes(x=Genre,y=Réalisateur), size=8, color = yellow)+
  geom_text(aes(label=Notes),
            family = "AvertaPE-Regular",
            color = white,
            size=3)+
  geom_text(data=RéalGenreMax,aes(x=Genre,y=Réalisateur,label=Notes),
            family = "AvertaPE-Black",
            color = purple,
            size=3)+
  scale_color_gradient(low=purple, high = blue)+
  labs(title = "Directors anchored to their genre",
       subtitle = "Test",
       x = NULL, y = NULL,
       color = "Grades",
       caption = "Source : Critique Films")+
  theme(text=element_text(size=12, family="AvertaPE-Regular"),
        panel.background = element_blank(),
        legend.position = "right",
        legend.background = element_blank(),
        axis.line = element_line(colour = purple),
        axis.text.x = element_text(angle = 45, hjust=1),
        plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
        plot.caption = element_text(size = 10, color = blue))

Genre

Genre <- CritiqueFilm$Genre
Genre <- unlist(strsplit( Genre," / "))
Genre <- as.data.frame(table(Genre))
Genre <- Genre[order(-Genre$Freq),]
Genre$Freq <- round(1+Genre$Freq/10,0)


Genre1 <- select(CritiqueFilm,`Genre 1`,`Nos notes`)
Genre2 <- select(CritiqueFilm,`Genre 2`,`Nos notes`)
colnames(Genre2) <- colnames(Genre1) <- c("Genre","Note")
Genre_merge <- rbind(Genre1,Genre2)
Genre_merge


for(z in 1:nrow(Genre)){
  Genre$Notes[z] <- round(mean(Genre_merge$Note[Genre_merge$Genre == Genre$Genre[z]], na.rm = T),1)
}


wordcloud(words = Genre$Genre, freq = Genre$Freq, min.freq = 1,
          max.words=100, random.order=FALSE, rot.per=0, 
          colors=rev(mycols3),
          family = "AvertaPE-Black")

Genre <- head(Genre,sum(as.numeric(Genre$Freq>1))) %>% as.data.frame()

Genre_graph <- ggplot(Genre, aes(x = Notes, y = Freq))+
  xlim(min(Genre$Notes),max(Genre$Notes)+0.3)+
  geom_vline(xintercept = mean(Genre$Notes,na.rm = T),
            col = grey)+
  geom_point(aes(colour = Notes > mean(Notes,na.rm = T)))+ #I use a formula to have conditional colours
  geom_smooth(col = pink, method = lm, formula = y ~ splines::bs(x, 2), se = FALSE)+
  geom_text(aes(label = paste0(Genre,": ",Notes),colour = Notes > mean(Notes,na.rm = T)),
            hjust=-0.1,
            vjust=-0.5,
            check_overlap = T) +
    geom_text(aes(x=mean(Notes,na.rm = T)-0.05,label=round(mean(Notes,na.rm = T),2), y=max(Freq)+1),
            colour=grey,
            angle=0,
            vjust = 0,
            family="AvertaPE-Regular",
            size = 9/.pt)+
  scale_color_manual(values=mypal(2)) +
  labs(title="Genre by volume and score",
       subtitle = Sub,
       y="Frequency", x="Grade")+
  theme(text=element_text(size=12, family="AvertaPE-Regular"),
        panel.background = element_blank(),
        legend.position = "none",
        legend.background = element_blank(),
        axis.line = element_line(colour = purple),
        plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
        plot.caption = element_text(size = 10, color = blue))
Genre_graph

Genre_radar <- rbind(CritiqueFilm %>%
        select(Grade, `Genre 1`) %>%
        `colnames<-`(c("Grade","Genre")),
  CritiqueFilm %>%
    select(Grade, `Genre 2`) %>%
    `colnames<-`(c("Grade","Genre"))) %>% 
  filter(!is.na(Genre), Grade == "A" | Grade == "E") %>% 
  group_by(Grade, Genre) %>% 
  summarise(Count = n()) %>% 
  arrange(desc(Count)) %>%
  ungroup() %>% 
  mutate(Count = ifelse(Count > 75,75,Count),
    Count = BBmisc::normalize(Count, method="range"))

Top_Genre <- (Genre_radar %>% group_by(Genre) %>% summarise(Sum = sum(Count)) %>% arrange(desc(Sum)) %>% head(10))$Genre

Genre_radar <- Genre_radar %>% 
  filter(Genre %in% Top_Genre)

Skill_radar <-xtabs(formula=Count~Grade+Genre,data=Genre_radar) %>%
  as.data.frame.matrix()

Skill_radar <- Skill_radar %>%
  mutate(Grade = row.names(Skill_radar)) %>% 
  select(Grade, everything()) %>%
  `rownames<-`(1:nrow(Skill_radar)) %>% 
  select(Grade, c(Top_Genre))

Skill_radar_graph <- Skill_radar %>%
  ggradar(grid.label.size = 4,  # Affects the grid annotations (0%, 50%, etc.)
          axis.label.size = 3.2,
          group.point.size = 3,   # Simply the size of the point 
          group.colours = c(blue, pink))+
  labs(title = paste("Genre comparison between A-Listed and E-Listed"),
       caption = "Source : Critique Films")+
  theme(text=element_text(size=12, family="AvertaPE-Regular"),
        legend.position = c(-0.1,0.2),
        legend.justification = "left",
        legend.text = element_text(size = 10),
        legend.key = element_rect(fill = NA, color = NA),
        legend.background = element_blank(),
        plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
        plot.caption = element_text(size = 10, color = blue))
  
Skill_radar_graph

Actors

Acteur <- Acteur[order(-Acteur$Freq),]

Acteur_graph <- ggplot(Acteur, aes(x = Notes, y = Freq))+
  geom_vline(xintercept = mean(Acteur$Notes,na.rm = T),
            col = grey)+
  geom_text(aes(x=mean(Notes,na.rm = T)-0.1,label=round(mean(Notes,na.rm = T),2), y=max(Freq)+1),
            colour=grey,
            angle=0,
            vjust = 1.2,
            family="AvertaPE-Regular",
            size = 9/.pt)+
  geom_smooth(col = pink, method = lm, formula = y ~ splines::bs(x, 3), se = FALSE)+
  geom_point(aes(size = Freq, colour = Notes > mean(Notes,na.rm = T)))+
  scale_size_continuous(range=c(0.5,5)) +
  xlim(min(Acteur$Notes),6)+
  geom_text(aes(label = paste(Acteur,Notes),colour = Notes > mean(Notes,na.rm = T)),
            hjust=-0.1,
            vjust=-0.5,
            family="AvertaPE-Regular",
            size = 9/.pt,
            check_overlap = T) +
  scale_color_manual(values=mycols2) +
  labs(title="Actor with the best average according to their frequency",
       subtitle="",
       y="Frequency", x="Grade")+
  theme(text=element_text(size=12, family="AvertaPE-Regular"),
        panel.background = element_blank(),
        legend.position = "none",
        legend.background = element_blank(),
        axis.line = element_line(colour = purple),
        plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
        plot.caption = element_text(size = 10, color = blue))
Acteur_graph

rownames(Acteur) <- Acteur$Rank <- 1:length(Acteur$Acteur)



for (i in 1:nrow(Acteur)){
  Pays <- Acteur_merge$`Emoji Pays`[Acteur_merge$Acteur==Acteur$Acteur[i]][!is.na(Acteur_merge$`Emoji Pays`[Acteur_merge$Acteur==Acteur$Acteur[i]])] %>% unique()
  Acteur$Countries[i] <- paste0(Pays[1],Pays[2],Pays[3],Pays[4],Pays[5],Pays[6],Pays[7],Pays[8],Pays[9],Pays[10],Pays[11]) %>% str_replace_all(pattern = "NA","")
}

Acteur <- select(Acteur, Rank, Acteur, Countries, Freq, Notes, Presse, Total)

Acteur %>%
  mutate(Total = color_tile(blue, purple)(Total)) %>%
  head(50) %>% 
  kable(escape = F, align = c("l","l","c", "c", "c", "c", "c")) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed")) %>%
  column_spec(2, bold = T) %>%
  column_spec(7, bold = T, color = white) %>%
  scroll_box(width = "100%", height = "400px")
Rank Acteur Countries Freq Notes Presse Total
1 Hugh Jackman 🇺🇸 20 3.9 3.6 7.5
2 Brad Pitt 🇺🇸🇫🇷 15 4.0 3.5 7.5
3 Ryan Reynolds 🇺🇸 15 3.6 3.5 7.1
4 Robert Downey Jr.  🇺🇸 13 4.3 3.9 8.2
5 Johnny Depp 🇺🇸 13 4.2 3.7 7.9
6 Chris Evans 🇺🇸🇰🇷 13 4.1 3.6 7.7
7 Robert De Niro 🇺🇸 13 3.7 3.6 7.3
8 Leonardo DiCaprio 🇺🇸🇬🇧 12 4.3 3.8 8.1
9 Chris Hemsworth 🇺🇸 11 4.3 3.7 8.0
10 Emma Watson 🇺🇸 11 4.1 3.9 8.0
11 Tom Hanks 🇺🇸 11 4.1 3.9 8.0
12 Daniel Radcliffe 🇳🇿🇺🇸 11 4.1 3.8 7.9
13 Christian Bale 🇺🇸🇯🇵 11 4.0 3.8 7.8
14 Jean Dujardin 🇫🇷🇺🇸 11 3.5 3.1 6.6
15 Seth Rogen 🇺🇸🇬🇧 11 3.3 3.0 6.3
16 Emma Stone 🇺🇸 10 3.7 3.7 7.4
17 Ben Stiller 🇺🇸 10 3.8 3.3 7.1
18 Jake Gyllenhaal 🇺🇸🇨🇦 10 3.6 3.4 7.0
19 Rupert Grint 🇺🇸 9 4.3 3.9 8.2
20 Daniel Craig 🇺🇸🇬🇧 9 4.4 3.7 8.1
21 Chris Pratt 🇺🇸 9 4.1 3.6 7.7
22 Scarlett Johansson 🇺🇸 9 3.7 3.7 7.4
23 Dwayne Johnson 🇺🇸 9 3.2 3.4 6.6
24 Zac Efron 🇺🇸 9 3.3 3.1 6.4
25 Kevin Hart 🇺🇸 9 3.1 3.2 6.3
26 Michaël Youn 🇫🇷 9 2.7 2.1 4.8
27 Ramzy Bedia 🇫🇷 9 2.1 1.9 4.0
28 Orlando Bloom 🇺🇸 8 4.7 4.1 8.8
29 Natalie Portman 🇺🇸🇫🇷 8 4.4 3.8 8.2
30 Angelina Jolie 🇺🇸 8 4.1 3.8 7.9
31 Jennifer Lawrence 🇺🇸 8 4.0 3.7 7.7
32 Robin Williams 🇺🇸 8 4.1 3.6 7.7
33 Ralph Fiennes 🇺🇸🇬🇧 8 4.2 3.4 7.6
34 Bradley Cooper 🇺🇸 8 3.7 3.6 7.3
35 Marion Cotillard 🇫🇷🇺🇸 8 3.8 3.5 7.3
36 Joseph Gordon-Levitt 🇺🇸 8 3.7 3.4 7.1
37 Amy Adams 🇺🇸 8 3.6 3.4 7.0
38 Steve Carell 🇺🇸 8 3.5 3.5 7.0
39 Will Smith 🇺🇸 8 3.6 3.3 6.9
40 Ewan McGregor 🇺🇸 7 4.5 3.8 8.3
41 Aaron Taylor-Johnson 🇺🇸🇬🇧 7 4.3 3.7 8.0
42 Chris Pine 🇺🇸🇬🇧 7 4.1 3.8 7.9
43 Michael Fassbender 🇺🇸 7 4.2 3.5 7.7
44 Christoph Waltz 🇺🇸🇬🇧 7 3.8 3.7 7.5
45 Jack Black 🇺🇸 7 3.9 3.6 7.5
46 Anne Hathaway 🇺🇸 7 3.7 3.6 7.3
47 Robert Pattinson 🇺🇸 7 3.9 3.3 7.2
48 John Leguizamo 🇺🇸 7 3.3 3.6 6.9
49 Kristen Wiig 🇺🇸 7 3.6 3.3 6.9
50 Owen Wilson 🇺🇸 7 3.6 3.3 6.9
ActeurDB <- read_excel("/Users/theotimebourgeois/Documents/Documents/Critique Film.xlsx", sheet = "Acteurs")

Actor2 <- ActeurDB %>%
  select(`Titre du film`,Acteur,`Présent dans "Film à voir"`,DA) %>%
  filter(`Présent dans "Film à voir"`=="-",DA!=TRUE) %>%
  select(`Titre du film`,Acteur) %>%
  `colnames<-`(c("Titre","Acteur")) %>% 
  rbind(CritiqueFilm %>% filter(DA!=TRUE) %>% select(`Titre du film`,`Acteur 1`) %>% `colnames<-`(c("Titre","Acteur")),
        CritiqueFilm %>% filter(DA!=TRUE) %>% select(`Titre du film`,`Acteur 2`) %>% `colnames<-`(c("Titre","Acteur")),
        CritiqueFilm %>% filter(DA!=TRUE) %>% select(`Titre du film`,`Acteur 3`) %>% `colnames<-`(c("Titre","Acteur"))) %>% 
  unique()

Actor_list <- Actor2$Acteur %>% table() %>% as.data.frame() %>% arrange(desc(Freq)) %>% head(50)
Actor_list <- Actor_list$. %>% as.character()
ActeurMain <- matrix(data = NA, nrow = 1, ncol=3) %>% as.data.frame() %>% `colnames<-`(c("from","to","value"))


for (ActeurName in Actor_list){
  TitleActor <- Actor2$Titre[Actor2$Acteur == ActeurName]
  ActeurSolo <- Actor2$Acteur[Actor2$Titre %in% TitleActor] %>%
    table() %>%
    as.data.frame() %>%
    `colnames<-`(c("to","value")) %>%
    mutate(from = ActeurName, to = as.character(to)) %>%
    filter(to!=from)
  ActeurMain <- rbind(ActeurMain,ActeurSolo)
}

connect <- ActeurMain %>% filter(!is.na(value),to %in% Actor_list)

# Number of connection per person
coauth <- c( as.character(connect$from), as.character(connect$to)) %>%
  as_tibble() %>%
  group_by(value) %>%
  summarize(n=n())
colnames(coauth) <- c("name", "n")
#dim(coauth)

# Create a graph object with igraph
mygraph <- graph_from_data_frame( connect, vertices = coauth, directed = FALSE )

# Find community
com <- walktrap.community(mygraph)
#max(com$membership)

#Reorder dataset and make the graph
coauth <- coauth %>% 
  mutate( grp = com$membership) %>%
  arrange(grp) %>%
  mutate(name=factor(name, name))

colfunc <- colorRampPalette(c(purple,pink,blue,yellow))

scale_col <- colfunc(max(coauth$grp))

# keep only this people in edges
connect <- connect %>%
  filter(from %in% coauth$name) %>%
  filter(to %in% coauth$name)

# Create a graph object with igraph
mygraph <- graph_from_data_frame( connect, vertices = coauth, directed = FALSE )

# Make the graph
ggraph(mygraph, layout="linear") + 
  geom_edge_arc(edge_colour=grey, fold=TRUE) +
  geom_node_point(aes(size=n, color=as.factor(grp), fill=grp)) +
  scale_color_manual(values =scale_col)+
  scale_size_continuous(range=c(0.5,5)) +
  geom_node_text(aes(label=name), angle=65, hjust=1, nudge_y = -0.5, size=3) +
  expand_limits(x = c(-1.2, 1.2), y = c(-5,0))+
  theme(text=element_text(size=12),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        legend.position = "none")

International

Même si Hollywood domine

cat("
<style>
.leaflet-container {
   background: #FFF;
}
</style>
")
map <- ne_countries()
write.csv(map$sovereignt,"map_Countries.csv")

#map <- map[map$sovereignt %in% (CritiqueFilm$`Pays d'origine` %>% unique()),]

for (s in 1:nrow(map)){
  map$freq[s] <- sum(CritiqueFilm$`Pays d'origine`==map$sovereignt[s], na.rm = T)
  
  map$best_movie[s] <- if(identical(CritiqueFilm$`Titre du film`[min(CritiqueFilm$`Nombre Classement`[CritiqueFilm$`Pays d'origine`==map$sovereignt[s]], na.rm = T)==CritiqueFilm$`Nombre Classement`],character(0))) NA_character_ else CritiqueFilm$`Titre du film`[min(CritiqueFilm$`Nombre Classement`[CritiqueFilm$`Pays d'origine`==map$sovereignt[s]], na.rm = T)==CritiqueFilm$`Nombre Classement`]
  
  map$best_movie_rate[s] <- if(identical(CritiqueFilm$`Notes cummulées`[min(CritiqueFilm$`Nombre Classement`[CritiqueFilm$`Pays d'origine`==map$sovereignt[s]],na.rm = T)==CritiqueFilm$`Nombre Classement`],numeric(0))) NA_character_ else CritiqueFilm$`Notes cummulées`[min(CritiqueFilm$`Nombre Classement`[CritiqueFilm$`Pays d'origine`==map$sovereignt[s]],na.rm = T)==CritiqueFilm$`Nombre Classement`]
}


for (s in 1:nrow(map)){
  map$best_movie_rate[s] <- if(identical(CritiqueFilm$`Notes cummulées`[min(CritiqueFilm$`Nombre Classement`[CritiqueFilm$`Pays d'origine`==map$sovereignt[s]],na.rm = T)==CritiqueFilm$`Nombre Classement`],numeric(0))) NA_character_ else CritiqueFilm$`Notes cummulées`[min(CritiqueFilm$`Nombre Classement`[CritiqueFilm$`Pays d'origine`==map$sovereignt[s]],na.rm = T)==CritiqueFilm$`Nombre Classement`]
}


map$freq[map$freq==0] <- NA

map$Grade <- case_when(
  map$freq==1 ~ 1,
  map$freq<5 ~ 2,
  map$freq<100 ~ 3,
  map$freq<500 ~ 4,
  !is.na(map$freq) ~ 5,
  is.na(map$freq) ~ 0
)

map$Label <- case_when(
  map$freq==1 ~ "1",
  map$freq<5 ~ "<5",
  map$freq<100 ~ "<100",
  map$freq<500 ~ "<500",
  !is.na(map$freq) ~ ">500",
  is.na(map$freq) ~ "0"
)

pal <- colorBin(
  palette = mycols5, domain = map$Grade,
  bins = seq(0, max(map$Grade, na.rm = TRUE), by = 1)
)

map$labels <- paste0(
  "<strong> Country: </strong> ", map$sovereignt, "<br/> ",
  "<strong> Number of movies seen : </strong> ", round(map$freq,0), "<br/> ",
  "<strong> Best movie for this country : </strong> ", map$best_movie," : ",map$best_movie_rate,"/10", "<br/> "
) %>%
  lapply(htmltools::HTML)

LeafMap <- leaflet(map) %>%
  setMapWidgetStyle(list(background= "white")) %>%
  setView(lng = 0, lat = 30, zoom = 1.3) %>%
  addPolygons(
    fillColor = ~ pal(Grade),
    color = purple,
    weight = 1,
    opacity = 1,
    fillOpacity = 1,
    label = ~labels,
    highlight = highlightOptions(
      color = pink,
      bringToFront = TRUE,
      fill = 1, fillOpacity=1
    )
  ) %>%
  addLegend(
    pal = pal,
    values = ~Grade, #c("0","1","<5","<100","<500",">500"),
    opacity = 1,
    title = "Freq"
  )
LeafMap
data <- CritiqueFilm %>%
  group_by(`Pays d'origine`) %>%
  summarise(value=n()) %>%
  ungroup() %>%
  `colnames<-`(c("group","value")) %>% 
  filter(group!="Multi")

data$Perc <- paste0(round(100*data$value/sum(data$value),1),"%")

data$group[data$group=="South Korea"] <- "Korea"
data$group[data$group=="United States of America"] <- "United States"

data <- map %>%
  as.data.frame() %>%
  select(name,wb_a2) %>%
  `colnames<-`(c("group","code")) %>%
  merge(data,by="group", all.y = TRUE) %>%
  filter(!is.na(code)) %>%
  mutate(group=group %>% toupper(),
         label=paste0(group,": ",value),
         code=code %>% tolower()) %>% 
  filter(group!="UNITED STATES",
         group!="FRANCE")

packing <- circleProgressiveLayout(data$value, sizetype='area')
data <- cbind(data, packing)
dat.gg <- circleLayoutVertices(packing, npoints=50)

dataPerc <- data %>% arrange(desc(value)) %>% head(7)

ggplot() +
  geom_point(data = data, aes(x, y, size=value, col=group), alpha = 1)+
  geom_flag(data = data, aes(x, y, size=value, country=code))+
  geom_text(data = dataPerc, aes(x, y, label=Perc), size=4, family = "AvertaPE-Black", col=purple)+
  geom_polygon(data = dat.gg, aes(x, y, group = id, fill=as.factor(id)), alpha = 0) + #add col to see the size
  scale_size(range = c(5,40))+
  labs(title="Title",
       subtitle="Test",
       y="Grade",
       col = "Saga",
       caption="Source: Critique Films")+
  theme(text=element_text(size=12, family="AvertaPE-Regular"),
        plot.margin = unit(c(0.2,10,0.2,0.2), "cm"),
        panel.background = element_blank(),
        panel.grid = element_blank(),
        panel.border = element_blank(),
        legend.position = "none",
        legend.background = element_blank(),
        axis.title = element_blank(),
        axis.text = element_blank(),
        axis.line = element_blank(),
        axis.ticks = element_blank(),
        plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
        plot.caption = element_text(size = 10, color = blue))+
  coord_equal()

Academy Awards

Whenever I talk about the Oscars with my movie-loving friends, I’ve spotted two things that come up regularly:

The Oscars are only about American films and are not representative of world cinema. The quality of the films presented at the Oscars is getting lower and lower.

OscarDB <- read_excel("/Users/theotimebourgeois/Documents/Documents/Critique Film.xlsx",sheet = "Oscars",skip=3)

OscarDB <- OscarDB %>% filter(!is.na(Catégorie), Cérémonie>=70) %>% select(1:4) %>% mutate(`Titre du film`=str_to_title(`Titre du film`))
CountOscarMovies <- OscarDB %>% select("Titre du film") %>% unique() %>% nrow()

NamesFilm$`Titre du film` <- str_to_title(NamesFilm$`Titre du film`)
OscarDB <- OscarDB %>% merge(NamesFilm, by="Titre du film")

CountOscarMoviesHorsUS <- OscarDB %>% select(`Titre du film`,`Pays d'origine`) %>% unique() %>% select(2) %>% filter(`Pays d'origine`!="United States of America") %>% nrow()

OscarHorsUSA <- (100*(OscarDB %>% filter(Catégorie!="Meilleur film international",`Pays d'origine`!="United States of America") %>% nrow())/
  (OscarDB %>% filter(Catégorie!="Meilleur film international") %>% nrow())) %>% round()


OscarUSAMovies <- OscarDB %>% filter(`Pays d'origine`=="United States of America") %>% nrow()
OscarMoviesPerc <- (100*(CountOscarMovies-CountOscarMoviesHorsUS)/CountOscarMovies) %>% round()
Year <- (now() %>% format("%Y") %>% as.character() %>% as.numeric())-1998
OscarMean <- OscarDB$`Notes cummulées` %>% mean(na.rm=T) %>% round(1)

Top <- 10

Oscar_Country <- OscarDB %>%
  group_by(`Pays d'origine`, `Emoji Pays`) %>% 
  summarise(Freq=n()) %>%
  arrange(desc(Freq)) %>%
  filter(`Pays d'origine`!="Multi",`Pays d'origine`!="United States of America") %>% 
  ungroup() %>%
  head(Top) %>% 
  arrange(Freq) %>% 
  mutate(ID=1, ID=cumsum(ID))


OscarDBTop <- Oscar_Country %>% arrange(desc(Freq)) %>% head(5)
OscarDBTop <- OscarDB[OscarDB$`Pays d'origine` %in% OscarDBTop$`Pays d'origine`,] %>%
  arrange(desc(`Notes cummulées`)) %>% 
  group_by(`Pays d'origine`) %>% 
  summarise(Year=first(Année), Title=first(`English Title`), Director=first(Réalisateur), Country=first(`Pays d'origine`)) %>% 
  ungroup() %>% 
  mutate(label=paste0(Title," (",Year,") directed by ",Director))

sizeOsc <- 4
maxOsc <- (-(Oscar_Country$Freq %>% max())*2)-2

Oscar_Country <- map %>%
  as.data.frame() %>%
  select(name,wb_a2) %>%
  `colnames<-`(c("Pays d'origine","Code")) %>%
  merge(Oscar_Country,by="Pays d'origine") %>% 
  mutate(`Pays d'origine`=`Pays d'origine` %>% toupper(),
         angle=case_when(`Pays d'origine`=="FRANCE" ~ 0,
                         TRUE ~ 180),
         hjust=case_when(`Pays d'origine`=="FRANCE" ~ 1.2,
                         TRUE ~ -0.2),
         label=paste0(`Pays d'origine`,": ",Freq)) %>% 
  arrange(desc(Freq))

colfuncOsc <- colorRampPalette(c(purple,pink,yellow))

Oscar_Country_graph <- ggplot(Oscar_Country, aes(y = ID))+
  geom_segment(aes(x = 0, xend = maxOsc/2, yend = ID), size = .2, col = grey)+
  geom_segment(aes(x = 0, xend = -Freq, yend = ID, group=Freq, col=factor(Freq)), size = sizeOsc)+
  geom_textpath(aes(x = -Freq, y = ID, label = label),
                hjust=Oscar_Country$hjust, size=sizeOsc-1, col=white, family="AvertaPE-Regular",upright=F,angle=Oscar_Country$angle)+
  geom_segment(aes(x = 0, xend = 0, y = 0, yend = Top+1), col=purple)+
  xlim(c(maxOsc,0)) +
  ylim(c(-2,Top+1)) +
  scale_color_manual(values=colfuncOsc(Oscar_Country$Freq %>% unique() %>% length()))+
  geom_flag(aes(x = -Freq,country=Code %>% tolower()),size = sizeOsc-0.5)+
  coord_polar()+
  # labs(title="Evolution of the scores of the films presented at the Oscars",
  #      subtitle=paste0("from ",YearMin_graph," to ",YearMax_graph))+
  theme(text=element_text(size=12, family="AvertaPE-Regular"),
        panel.background = element_blank(),
        legend.position = "none",
        plot.margin = unit(c(0,-12,0,0), "cm"),
        legend.background = element_blank(),
        axis.line = element_blank(),
        axis.text.y = element_blank(),
        axis.title.y = element_blank(),
        axis.title.x = element_blank(),
        axis.text.x = element_blank(),
        axis.ticks = element_blank(),
        plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
        plot.caption = element_text(size = 10, color = blue))

#ggsave(file="CountryOscar.png", plot=Oscar_Country_graph, width=9, height=12)

#OscarDB$`Notes cummulées` %>% mean(na.rm=T)

OscFr <- Oscar_Country$Freq[Oscar_Country$`Pays d'origine`=="FRANCE"]

Oscar_Country_graph

Out of the 3 categories of films (Best Foreign Language Film, Best Animated Feature, Best Picture), 60% of the films are American but if we remove the category “Best Foreign Language Film” only 19% are not American so YES they dominate the Oscars but not the market…

Here are the best films presented by the countries in this top:

  • Denmark: Another Round (2020) directed by Thomas Vinterberg

  • France: The Pianist (2002) directed by Roman Polanski

  • Germany: Never Look Away (2018) directed by Florian Henckel von Donnersmarck

  • Japan: The Tale of the Princess Kaguya (2014) directed by Isao Takahata

  • United Kingdom: 1917 (2020) directed by Sam Mendes

In 25 years, 15 French films have been nominated for Oscars and it is the second most represented country after the USA (229 films presented).

In this top we find our British friends, a market that is a cousin of American cinema, which once again reflects the desire to have a closed circle. Japan is also here thanks to the animation category which gives it a lot of visibility.

OscarDB <- OscarDB %>% filter(!is.na(`Notes cummulées`))

OscarDB <- OscarDB %>% group_by(Cérémonie) %>% 
  summarise(avg = mean(`Notes cummulées`,na.rm=T),
            Freq = n(),
            max = max(`Notes cummulées`,na.rm=T),
            min = min(`Notes cummulées`,na.rm=T)) %>% 
  merge(OscarDB, by="Cérémonie") %>% 
  filter(Cérémonie<(Cérémonie %>% max()))

OscarDB2 <- OscarDB %>% group_by(Cérémonie) %>% 
  summarise(avg = mean(`Notes cummulées`,na.rm=T),
            Freq = n(),
            max = max(`Notes cummulées`,na.rm=T),
            min = min(`Notes cummulées`,na.rm=T)) %>% 
  merge(OscarDB %>% filter(Résultat=="Oscar",Catégorie=="Meilleur film") %>% select(Cérémonie,"English Title",`Notes cummulées`),by="Cérémonie")


OscarDB2 <- OscarDB2 %>%
  mutate(`English Title` = case_when(`English Title`=="The Lord of the Rings: The Return of the King" ~ "Lotr : The Return of the King",
                                     TRUE ~ `English Title`))

sizeOsc <- 5

OscarDB <- OscarDB[OscarDB$Cérémonie %in% OscarDB2$Cérémonie,]

Oscar_Year_graph <- ggplot(data=OscarDB2,aes(x=Cérémonie))+
  geom_segment(aes(y=min, yend = max, x=Cérémonie,xend=Cérémonie), size=sizeOsc/6, col =grey)+
  coord_flip()+
  geom_point(aes(y=min), col=pink, size=sizeOsc)+
  geom_text(aes(y=min, label=min), size=sizeOsc/2, col=white, family="AvertaPE-Black")+
  geom_point(aes(y=max), col=purple, size=sizeOsc)+
  geom_text(aes(y=max, label=max), size=sizeOsc/2, col=white, family="AvertaPE-Black")+
  geom_label(aes(label=`English Title`, y=`Notes cummulées`-sizeOsc/50),
             hjust=sizeOsc/5, family="AvertaPE-Regular", size=sizeOsc/2, fill = white, col=purple, label.size = NA)+
  geom_smooth(data = OscarDB, aes(y = `Notes cummulées`, group=Résultat, col = Résultat),
              method = lm, formula = y ~ splines::bs(x, 2), se = FALSE, size=sizeOsc/10)+
  scale_color_manual(values=c(blue,yellow))+
  geom_point(aes(y=`Notes cummulées`), col=yellow, size=sizeOsc+2)+
  geom_text(aes(y=`Notes cummulées`, label=`Notes cummulées`), size=(sizeOsc+2)/2, col=purple, family="AvertaPE-Black")+
  #ylim(c(4,10))+
  labs(title="Evolution of the scores of the films presented at the Oscars",
       subtitle=paste0("from ",YearMin_graph," to ",YearMax_graph))+
  theme(text=element_text(size=12, family="AvertaPE-Regular"),
        panel.background = element_blank(),
        legend.position = "none",
        legend.background = element_blank(),
        axis.line = element_blank(),
        axis.text.y = element_blank(),
        axis.title.y = element_blank(),
        axis.title.x = element_blank(),
        axis.text.x = element_blank(),
        axis.ticks = element_blank(),
        plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
        plot.caption = element_text(size = 10, color = blue))

Oscar_Year_graph

Despite this fine French performance, international cinema is very under-represented and the introduction of the foreign language film category has made it possible to give more visibility to these non-Hollywood markets. Among the great absentees, we will mention India with Bollywood likely to produce good films which unfortunately crosses our borders only too little. Spain is also absent despite its quality European productions and a market with the wind in its sails.

For the quality on the other hand the nominated films are about the same average for 25 years (~8/10) but the winners have a lower average and is therefore not a guarantee of appreciation of the public but of an established jury.

The films presented are obviously linked to political and economic issues that go far beyond the consumers, so I would say to be wary of what is presented as not representative. However, these films are overall good and will mark the year of their nominations!

Sagas

DecadeMin_graph <- 1930
DecadeMax_graph <- 2020

Decade_Grade_Saga <- select(CritiqueFilm,Saga,Année,`Nos notes`)
Decade_Grade_Saga <- Decade_Grade_Saga[Decade_Grade_Saga$Année>=DecadeMin_graph & Decade_Grade_Saga$Année<=DecadeMax_graph,]
Decade_Grade_Saga_table <- as.data.frame(table(Decade_Grade_Saga$Saga))
Decade_Grade_Saga_table <- Decade_Grade_Saga_table[order(-Decade_Grade_Saga_table$Freq),]
Decade_Grade_Saga_table <- Decade_Grade_Saga_table[Decade_Grade_Saga_table$Var1!="Saga",]
colnames(Decade_Grade_Saga_table)[1] <- "Saga"

for (s in 1:length(Decade_Grade_Saga_table$Saga)){
  Decade_Grade_Saga_table$Grade[s] <- round(mean(CritiqueFilm$`Nos notes`[Decade_Grade_Saga_table$Saga[s]==CritiqueFilm$Saga],na.rm = T),1)
  Decade_Grade_Saga_table$Presse[s] <- round(mean(CritiqueFilm$`Note Presse`[Decade_Grade_Saga_table$Saga[s]==CritiqueFilm$Saga], na.rm = T),1)
  Decade_Grade_Saga_table$Diff[s] <- round(mean(CritiqueFilm$Différence[Decade_Grade_Saga_table$Saga[s]==CritiqueFilm$Saga], na.rm = T),1)
  Decade_Grade_Saga_table$Duration[s] <- round(sum(CritiqueFilm$Durée[Decade_Grade_Saga_table$Saga[s]==CritiqueFilm$Saga], na.rm = T)/60,1)
}

rownames(Decade_Grade_Saga_table) <- 1:length(Decade_Grade_Saga_table$Saga)

knitr::kable(Decade_Grade_Saga_table) %>% 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed")) %>% 
  scroll_box(width = "100%", height = "370px")
Saga Freq Grade Presse Diff Duration
Disney 78 3.8 3.7 0.4 137.8
Marvel 50 3.9 3.5 0.5 126.0
DreamWorks 37 3.6 3.5 0.5 61.5
DC 18 3.8 3.5 0.5 51.5
Ghibli 18 3.7 3.9 0.4 32.0
Star Wars 12 4.5 3.8 0.7 26.5
Harry Potter 10 4.3 3.8 0.6 28.0
American Pie 8 2.8 2.5 0.4 12.9
James Bond 6 4.3 3.6 0.8 15.8
Tarantino 6 4.5 4.2 0.3 14.1
Jurassic Park 5 4.3 3.3 1.0 12.4
L’Âge de glace 5 3.2 3.4 0.3 8.8
Pirates des Caraïbes 5 4.6 3.8 0.8 12.1
Pokémon 5 3.3 3.0 0.5 8.4
Saw 5 3.1 2.8 0.5 8.2
Scary Movie 5 2.7 2.2 0.5 7.1
Transformers 5 3.2 3.0 0.5 12.1
Twilight 5 3.6 3.1 0.5 10.2
Astérix 4 3.8 3.0 0.8 8.4
Hunger Games 4 4.2 3.6 0.5 9.1
La Planète des Singes 4 4.7 4.0 0.7 8.1
La Terre du Milieu 4 4.9 4.3 0.6 14.0
Moi, Moche et Méchant 4 3.9 3.7 0.2 8.1
American Nightmare 3 3.4 3.1 0.4 4.9
Ducobu 3 2.6 2.0 0.6 4.7
Fast & Furious 3 2.4 2.8 0.5 5.8
Hellboy 3 3.3 2.8 0.6 6.0
Hôtel Transylvanie 3 3.6 3.5 0.2 6.2
John Wick 3 4.2 3.6 0.5 5.9
Jumanji 3 3.9 3.5 0.3 5.8
Klapisch 3 2.9 3.7 0.8 6.1
La Nuit au Musée 3 3.1 2.8 0.6 6.5
Les Schtroumpfs 3 3.3 2.9 0.4 5.0
Lucky Luke 3 2.8 1.6 1.2 4.6
Mon beau-père et moi 3 3.2 3.1 0.2 5.3
Narnia 3 3.5 3.2 0.4 6.8
Retour vers le futur 3 4.6 4.3 0.4 5.7
Star Trek 3 4.7 3.8 0.9 6.4
Very Bad Trip 3 4.0 3.4 0.6 5.0
300 2 4.0 3.6 0.4 3.6
Babysitting 2 3.9 3.8 0.1 3.0
Borat 2 2.8 2.7 0.1 2.9
Comme des bêtes 2 3.0 3.6 0.6 2.9
Comment tuer son boss? 2 3.2 2.9 0.4 3.4
Destination finale 2 3.0 2.7 0.3 2.9
Dr. Seuss 2 3.4 3.3 0.7 3.2
Happy Feet 2 4.3 3.2 1.2 3.5
Jump Street 2 3.5 3.5 0.0 3.7
Kick-Ass 2 4.7 3.9 0.9 3.7
Kingsman 2 4.8 4.0 0.8 6.7
La Tour Montparnasse 2 2.5 1.9 0.6 3.0
Le Choc des Titans 2 4.2 2.4 1.8 3.4
Les Nouvelles Aventures 2 1.3 1.5 0.2 3.4
Les Visiteurs 2 2.8 2.6 0.6 3.6
Matrix 2 4.2 4.0 0.3 4.6
Nos pires voisins 2 3.4 2.6 0.8 3.1
OSS 117 2 3.8 3.3 0.5 5.2
Papa ou Maman 2 3.6 3.5 0.2 3.0
Percy Jackson 2 3.3 2.7 0.6 3.8
Red 2 3.9 3.5 0.4 3.8
Rio 2 3.9 3.8 0.4 3.2
Sherlock Holmes 2 4.8 4.0 0.8 4.3
Sister Act 2 3.2 3.5 0.2 3.5
Ted 2 3.8 3.3 0.5 3.7
Zombieland 2 3.9 3.9 0.2 3.1
À couteaux tirés 1 4.4 4.0 0.4 4.5
Agatha Christie 1 4.2 3.2 1.0 4.1
Alibi 1 3.6 3.7 0.1 1.5
Alien 1 4.0 3.3 0.7 2.1
Asimov 1 3.7 3.7 0.0 1.7
Assassin’s Creed 1 3.6 2.9 0.7 1.9
Avatar 1 5.0 4.3 0.7 5.9
Balle Perdue 1 3.3 3.1 0.2 3.2
Blade Runner 1 2.8 4.2 1.4 2.0
Breaking Bad 1 4.2 3.7 0.5 2.0
Dernier train pour Busan 1 3.9 4.1 0.2 2.0
Dragon Quest 1 4.1 3.8 0.3 1.7
Enola Holmes 1 3.1 3.6 0.5 4.2
Gatsby 1 5.0 4.0 1.0 2.4
Ghost in the Shell 1 3.2 3.3 0.1 1.8
Happy Birthdead 1 3.7 3.3 0.4 1.6
Hawking 1 4.2 4.3 0.1 2.0
His Dark Materials 1 4.0 2.8 1.2 1.9
Jackass 1 1.0 2.2 1.2 1.4
Le Labyrinthe 1 3.8 3.8 0.0 1.9
Le Petit Nicolas 1 2.4 2.4 0.0 1.6
Lego 1 3.8 3.0 0.8 1.2
LEGO 1 3.6 4.0 0.4 1.7
Limitless 1 5.0 3.9 1.1 1.8
Mad Max 1 4.6 4.2 0.4 2.0
Matt Groening 1 4.4 3.8 0.6 1.4
Men in Black 1 3.4 2.5 0.9 1.9
MonsterVerse 1 4.2 3.8 0.4 3.1
Mythologie 1 4.2 3.6 0.6 2.7
Ocean 1 3.0 3.0 0.0 1.8
Orelsan 1 3.2 3.4 0.2 1.5
Prince of Persia 1 4.4 3.3 1.1 1.9
Sans un bruit 1 4.3 3.8 0.6 3.1
Seuls 1 2.4 2.3 0.1 1.6
Sonic 1 3.4 3.3 0.1 1.6
Stuart Little 1 3.2 2.5 0.7 1.4
Tintin 1 4.0 3.6 0.4 1.8
Titeuf 1 3.8 3.1 0.7 2.7
Transperceneige 1 4.6 3.5 1.1 2.1
Decade_Grade_Saga_table_top <- head(Decade_Grade_Saga_table,10)
Decade_Grade_Saga <- Decade_Grade_Saga[as.character(Decade_Grade_Saga$Saga) %in% as.character(Decade_Grade_Saga_table_top$Saga),]

Year_Grade_graph <- ggplot(Decade_Grade_Saga, aes(Année, Saga)) +
  geom_bin2d(binwidth = c(10,1))+
  scale_x_continuous(breaks = seq(DecadeMin_graph, DecadeMax_graph, 10))+
  scale_fill_gradient(low=purple, high = blue)+
  labs(y="Number of films",x="Decade")+
  labs(title="Count of films per Saga and Decades",
       subtitle=paste0("from ",DecadeMin_graph," to ",DecadeMax_graph),
       y="Saga", x="Decade")+
  theme(text=element_text(size=12, family="AvertaPE-Regular"),
        panel.background = element_blank(),
        legend.position = "none",
        legend.background = element_blank(),
        axis.line = element_line(colour = purple),
        plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
        plot.caption = element_text(size = 10, color = blue))
Year_Grade_graph

Consommation

Month_table <- as.numeric(format(as.Date(CritiqueFilm$`Date de sortie`, format = "%Y-%m-%d"), "%m"))
Month_table <- as.data.frame(table(Month_table))
Month_table$Freq <- round(Month_table$Freq*100/sum(Month_table$Freq),1)

Month_graph <- ggplot(Month_table, aes(x=Month_table, y=Freq))+
  geom_hline(yintercept = 100/12,col = grey)+
  geom_bar(stat = "identity", fill=grey, width = 0.01)+
  geom_point(size = 2, color = blue)+
  geom_text(aes(label = paste0(Freq,"%")),
            size=3,
            hjust=0.5,
            vjust=-1,
            family="AvertaPE-Regular",
            check_overlap = T) +
  scale_x_discrete(limits = 1:12, label = month.abb[1:12])+
  labs(title="Percentage of films seen per month",
       y="Percent", x="Month")+
  theme(text=element_text(size=12, family="AvertaPE-Regular"),
        panel.background = element_blank(),
        legend.position = "right",
        legend.background = element_blank(),
        axis.line = element_line(colour = purple),
        plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
        plot.caption = element_text(size = 10, color = blue))
Month_graph

Conclusion

#Les Derniers films vus
Top <- 10

Lastfilms <- CritiqueFilm[!is.na(CritiqueFilm$`Dernier visionnage`),]
Lastfilms <- select(Lastfilms,`English Title`,`Date de sortie`,`Pays d'origine`, Réalisateur,`Notes cummulées`,`Dernier visionnage`)
colnames(Lastfilms) <- c("Title","Date","Country","Director","Grade","Last Visio")
Lastfilms$`Last Visio` <- as.Date(as.POSIXct(Lastfilms$`Last Visio`))
Lastfilms$Date <- format(as.Date(Lastfilms$Date, "%m/%d/%y"),"%b %Y")

Lastfilms$Visio_num <- as.numeric(Lastfilms$`Last Visio`)
Lastfilms <- Lastfilms[order(-Lastfilms$Visio_num),]
Lastfilms <- head(Lastfilms,Top)
Lastfilms <- select(Lastfilms, -Visio_num)
Lastfilms$Country[Lastfilms$Country=="United States of America"] <- "USA"


Lastfilms %>%
  mutate(Grade = color_tile(blue, purple)(Grade)) %>% 
  kable(escape = F, align = c("l", "c", "c", "l", "c")) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed")) %>%
  column_spec(1, bold = T) %>%
  column_spec(5, bold = T, color = white)
Title Date Country Director Grade Last Visio
The Fox and the Hound 2 nov 2006 USA Jim Kammerud 3.7 2023-03-12
The Menu nov 2022 USA Mark Mylod 7.0 2023-03-12
DC League of Super-Pets mai 2022 USA Jared Stern et Sam Levine 5.8 2023-03-12
The Whale mar 2023 USA Darren Aronofsky 8.3 2023-03-11
Lovely Bones jan 2010 New Zealand Peter Jackson 8.2 2023-03-11
Menteur jul 2022 France Olivier Baroux 4.2 2023-03-11
The Prince of Egypt déc 1998 USA Brenda Chapman 7.6 2023-03-10
Batman Begins jui 2005 USA Christopher Nolan 9.0 2023-03-09
Kick-Ass 2 aoû 2013 USA Jeff Wadlow 8.1 2023-03-07
Alibi.com fév 2017 France Philippe Lacheau 7.3 2023-03-06
library(rvest)
library(stringr)

load("ToBuy.Rda")

if(max(ToBuy$Date)<=(Sys.Date() %m-% days(1))){
  ToBuy <- CritiqueFilm$`Titre du film`[CritiqueFilm$`A acheter`=="A acheter"]
  ToBuy <- as.data.frame(ToBuy[!is.na(ToBuy)])
  colnames(ToBuy) <- "ToBuy"
  ToBuy$Link <- paste0("https://www.amazon.fr/s?k=",URLencode(ToBuy$ToBuy),"+blu-ray")
  length(ToBuy$Link)

  for (b in 1:length(ToBuy$Link)){
    url <- ToBuy$Link[b]
    website <- read_html(url)
    ToBuy$Price[b] <- html_text(html_nodes(website,".a-price-whole"))[1]
    ToBuy$Name[b] <- html_text(html_nodes(website,".s-line-clamp-4"))[1]
    Sys.sleep(sample(10, 1) * 0.1)
  }

  ToBuy$Price <- as.numeric(str_replace(ToBuy$Price,",","."))
  ToBuy$Date <- Sys.Date()
  save(ToBuy,file="ToBuy.Rda")}
load("ToBuy.Rda")

ToBuy <- ToBuy[order(ToBuy$Price),]
ToBuy$Price[ToBuy$Price>=30] <- NA
ToBuy$Price[ToBuy$Price<4] <- NA
ToBuy <- ToBuy[!is.na(ToBuy$Price),]
ToBuy$Price_rounded <- round(ToBuy$Price/2)*2

ToBuyTop <- ToBuy[1:10,c(1,3)]
ToBuyTop <- as.data.frame(ToBuyTop)
rownames(ToBuyTop) <- 1:10

ToBuyTop %>%
  mutate(Price = color_tile(blue, purple)(Price)) %>% 
  kable(escape = F, align = c("l", "c")) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed")) %>%
  column_spec(1, bold = T) %>%
  column_spec(2, bold = T, color = white)
ToBuy Price
Disney 101 - Raiponce 5.53
Sully 5.76
Disney 120 - Coco 7.00
Ready Player One 7.70
Les Affranchis 7.90
Shining 7.99
Don’t Look Up : Déni cosmique 8.80
Rocketman 8.97
Charlie et la Chocolaterie 9.30
Drunk 9.50

Year_graph_DB <- select(CritiqueFilm,`Date de sortie`,`Notes cummulées`,Grade,Saga)
Year_graph_DB <- Year_graph_DB[Year_graph_DB$`Date de sortie`>as.Date("1985-01-01"),]

Year_graph_DB$Saga[!Year_graph_DB$Saga %in% (Decade_Grade_Saga$Saga %>% unique() %>% head(5))] <- "Trend"

Year_graph <- ggplot(Year_graph_DB, aes(x=`Date de sortie`,y=`Notes cummulées`/2)) +
  geom_point(colour="#F2F2F2")+
  ylim(0,5)+
  scale_size_continuous(range=c(0.1,0.5))+
  geom_smooth(aes(group=Saga, col = Saga), method = lm, formula = y ~ splines::bs(x, 4), se = FALSE)+
  scale_color_manual(values=mypal(6)) +
  labs(y="Number of films",x="Year")+
  labs(title="Count of films per Year",
       subtitle=paste0("from ",YearMin_graph," to ",YearMax_graph),
       y="Grade", x="Year")+
  theme(text=element_text(size=12, family="AvertaPE-Regular"),
        panel.background = element_blank(),
        legend.position = "right",
        legend.background = element_blank(),
        axis.line = element_line(colour = purple),
        plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
        plot.caption = element_text(size = 10, color = blue))
Year_graph

#I import a picture and I set the size and the float
knitr::include_graphics("/Users/theotimebourgeois/Desktop/Graphisme/Théotime/PhotoCV.svg")

Analysis conducted by Théotime Bourgeois

Graduated with a Master of Science - Data Science & Organizational Behavior

Data Analyst in Luxembourg

Instagram LinkedIn


Oscar <- NamesFilm[!is.na(NamesFilm$Oscar),]
Oscar <- Oscar[Oscar$Année>=YearMin_graph,]
Oscar$OscarTF <- Oscar$Oscar %>% str_detect("Oscar")


ggplot(Oscar, aes(x = Année, y = `Notes cummulées`))+
  geom_count(colour = purple)+
  geom_point(data = Oscar %>% filter(OscarTF == TRUE), colour = yellow)+
  scale_size("Count", range = c(1, 6))+
  #stat_summary(aes(y = `Notes cummulées`,group = 1), fun=mean, colour=yellow,geom="line")+
  geom_smooth(aes(group=OscarTF, col = OscarTF), method = lm, formula = y ~ splines::bs(x, 2), se = FALSE)+
  scale_color_manual("Winner", values=c(purple,yellow)) +
  labs(y="Number of films",x="Year",
       title="Evolution of the scores of the films presented at the Oscars",
       subtitle=paste0("from ",YearMin_graph," to ",YearMax_graph),
       y="Grade", x="Year")+
  theme(text=element_text(size=12, family="AvertaPE-Regular"),
        panel.background = element_blank(),
        legend.position = "right",
        legend.background = element_blank(),
        axis.line = element_line(colour = purple),
        plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
        plot.caption = element_text(size = 10, color = blue))


Distri_Circle <- table(NamesFilm$`Maison de distribution`) %>%
  as.data.frame() %>%
  `colnames<-`(c("Maison de distribution","Freq"))

Distri_Circle <- merge(Distri_Circle,
      NamesFilm %>% select(`Maison de distribution`,`Maison mère`) %>% unique())

Distri_Circle <- Distri_Circle %>%
  mutate(root="root") %>%
  filter(!is.na(`Maison mère`)) %>%
  select(root,`Maison mère`,`Maison de distribution`,Freq) %>% 
  `colnames<-`(c("root","group","subgroup","value")) %>% 
  filter(group != "France") %>% 
  filter(group != "Autre") %>%
  filter(value > 10)


Distri_Circle$subgroup <- paste0(Distri_Circle$subgroup," (",Distri_Circle$value,")")

Distri_Circle$pathString <- paste("world", Distri_Circle$group, Distri_Circle$subgroup, sep = "/")
population <- as.Node(Distri_Circle)

# Make the plot
#circlepackeR(population, size = "value")

# You can custom the minimum and maximum value of the color range.
p <- circlepackeR(population, size = "value", color_min = "hsl(240, 31%, 25%)", color_max = "hsl(0, 0%, 0%)")
saveWidget(p, file="circles.html")
# p
Top <- 7

Top_Acteur <- Acteur$Acteur %>%
  head(Top) %>% 
  as.character()

Acteur_merge_Top <- Acteur_merge %>% filter(Acteur_merge$Acteur %in% Top_Acteur)

Acteur_Top_graph <-
  ggplot(Acteur_merge_Top, aes(x=`Date de sortie` ,y=`Nos notes`,col = Acteur))+
  geom_point()+
  geom_smooth(aes(group=Acteur),
              method = lm, formula = y ~ splines::bs(x, 3), se = FALSE)+
  scale_color_manual(values=mypal(Top)) +
  labs(title="Count of films per Year",
       subtitle=paste0("from ",
                       Acteur_merge_Top$`Date de sortie` %>%
                         format("%Y") %>%
                         as.numeric() %>%
                         min(),
                       " to ",
                       YearMax_graph),
       y="Grade", x="Year")+
  theme(text=element_text(size=12, family="AvertaPE-Regular"),
        panel.background = element_blank(),
        legend.position = "right",
        legend.background = element_blank(),
        axis.line = element_line(colour = purple),
        plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
        plot.caption = element_text(size = 10, color = blue))
Acteur_Top_graph

Top <- 10

Acteur_Proj <- rbind(
  NamesFilm %>% select(`Acteur 1`,Seen,`Notes cummulées`) %>% `colnames<-`(c("Acteur","Seen","Grade")),
  NamesFilm %>% select(`Acteur 2`,Seen,`Notes cummulées`) %>% `colnames<-`(c("Acteur","Seen","Grade")),
  NamesFilm %>% select(`Acteur 3`,Seen,`Notes cummulées`) %>% `colnames<-`(c("Acteur","Seen","Grade"))) %>% 
  filter(!is.na(Grade)) %>% 
  mutate(Seen=ifelse(Seen,"Seen","NotSeen"),
         Seen=ifelse((Grade>=8 & Seen=="NotSeen"),"Good",Seen)) %>%
  select(-Grade) %>%
  group_by(Acteur,Seen) %>% 
  summarise(Freq=n()) %>%
  ungroup() %>% 
  pivot_wider(names_from = Seen, 
              values_from = Freq) %>% 
  mutate(NotSeen = ifelse(is.na(NotSeen),0,NotSeen),
         Seen = ifelse(is.na(Seen),0,Seen),
         Good = ifelse(is.na(Good),0,Good),
         Total= NotSeen+Good+Seen,
         TotalNotSeen = NotSeen+Good,
         SeenGood = Seen+Good) %>% 
  arrange(desc(Total)) %>% 
  head(Top) %>% 
  mutate(ID=1,
         ID=cumsum(ID))
## `summarise()` has grouped output by 'Acteur'. You can override using the
## `.groups` argument.
# Acteur_Proj <- rbind(
#   NamesFilm %>% select(`Acteur 1`,Seen) %>% `colnames<-`(c("Acteur","Seen")),
#   NamesFilm %>% select(`Acteur 2`,Seen) %>% `colnames<-`(c("Acteur","Seen")),
#   NamesFilm %>% select(`Acteur 3`,Seen) %>% `colnames<-`(c("Acteur","Seen"))) %>%
#   table() %>%
#   as.data.frame.matrix() %>%
#   arrange(`TRUE`) %>% 
#   arrange(desc(`FALSE`)) %>%
#   filter(`TRUE`!=0) %>% 
#   head(Top)
# 
# Acteur_Proj <- Acteur_Proj %>% 
#   mutate(Acteur = rownames(Acteur_Proj)) %>% 
#   select(Acteur, `TRUE`, `FALSE`) %>% 
#   `colnames<-`(c("Acteur","Seen","NotSeen")) %>% 
#   mutate(Total = Seen+NotSeen) %>%
#   arrange(desc(Seen)) %>% 
#   arrange(desc(Total)) %>% 
#   mutate(Acteur = fct_reorder(Acteur,Total))
# 
# Acteur_Proj$ID <- rownames(Acteur_Proj) <- 1:nrow(Acteur_Proj)

sizeActor <- 2.5

ggplot(Acteur_Proj, aes(y = ID))+
  geom_segment(aes(x = 0, xend = Total, yend = ID, col = "Not Seen"), size = sizeActor)+
  geom_segment(aes(x = 0, xend = SeenGood, yend = ID, col = ">8"), size = sizeActor)+
  geom_segment(aes(x = 0, xend = Seen, yend = ID, col = "Seen"), size = sizeActor)+
  geom_point(aes(x = Seen, col = "Seen"), size = sizeActor-1)+
  geom_point(aes(x = SeenGood, col = ">8"), size = sizeActor-1)+
  geom_point(aes(x = Total, col = "Not Seen"), size = sizeActor-1)+
  scale_color_manual(values = c(yellow, grey_light, purple))+
  geom_segment(aes(x = 0, xend = 0, y = 0, yend = Top+1))+
  #geom_text(aes(x=Seen, label = Seen), col = white, family = "AvertaPE-Black", size = sizeActor-1)+
  #geom_text(aes(x=Total, label = Total), col = purple, family = "AvertaPE-Black", size = sizeActor-1)+
  geom_text(hjust = 1.05, size = sizeActor, family = "AvertaPE-Regular", aes(x = 0, y = ID, label = Acteur))+
  geom_richtext(size = sizeActor+0.5, family = "AvertaPE-Regular", col = purple, fill = NA, label.color = NA,
                aes(x=0,y=-5,label=paste0("<span style='font-size:20pt; color:black'> **",
                                          NotSeen %>% sum(),
                                          "**</span><br>movies to see")))+
  coord_polar() +
  xlim(c(0,Acteur_Proj$Total %>% max()+5)) +
  ylim(c(-5,Top+1)) +
  labs(title="Title",
       subtitle="Test",
       y=NULL, x=NULL,
       caption="Source : Crtitique Film",
       col = "Movies")+
  theme(text=element_text(size=12, family="AvertaPE-Regular"),
        panel.background = element_blank(),
        legend.position = "right",
        legend.background = element_blank(),
        axis.line = element_blank(),
        axis.text.y = element_blank(),
        axis.text.x = element_blank(),
        axis.ticks = element_blank(),
        plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
        plot.caption = element_text(size = 10, color = blue))
## Warning in do_once((if (is_R_CMD_check()) stop else warning)("The function
## xfun::isFALSE() will be deprecated in the future. Please ", : The function
## xfun::isFALSE() will be deprecated in the future. Please consider using
## base::isFALSE(x) or identical(x, FALSE) instead.

Anim_comp <- NamesFilm %>%
  filter(Saga=="Disney"|Saga=="DreamWorks"|Saga=="Ghibli"|`Maison de distribution`=="Illumination"|`Maison mère`=="Sony") %>%
  filter(Année>1990, Année<=2021) %>%
  filter(DA==TRUE) %>% 
  mutate(Saga = case_when(`Maison de distribution`=="Pixar" ~ "Pixar",
                          `Maison de distribution`=="Illumination" ~ "Illumination",
                          `Maison mère`=="Sony" ~ "Sony Animation",
                          TRUE ~ Saga)) #%>% 
  #filter(`Maison de distribution`!="Disney")

Anim_comp %>%
  select(Saga,`Notes cummulées`) %>%
  group_by(Saga) %>%
  summarise(Notes=mean(`Notes cummulées`), Freq = n()) %>%
  arrange(desc(Notes))
## # A tibble: 6 × 3
##   Saga           Notes  Freq
##   <chr>          <dbl> <int>
## 1 Pixar           8.47    23
## 2 Ghibli          7.66    20
## 3 Illumination    7.28    10
## 4 DreamWorks      7.17    41
## 5 Sony Animation  6.45    17
## 6 Disney          6.44    75
Anim_comp %>% 
  ggplot(aes(x=Année, y=`Notes cummulées`, col=Saga))+
  geom_point()+
  geom_smooth(aes(group=Saga),
              method = lm, formula = y ~ splines::bs(x, 3), se = FALSE)+
  scale_color_manual(values=mypal(Anim_comp$Saga %>% unique() %>% length()))+
  labs(title="Title",
       subtitle="Test",
       y="Grade",
       col = "Saga")+
  theme(text=element_text(size=12, family="AvertaPE-Regular"),
        panel.background = element_blank(),
        legend.position = "right",
        legend.background = element_blank(),
        axis.line = element_line(colour = purple),
        plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
        plot.caption = element_text(size = 10, color = blue))

ActeurRéalFile <- NamesFilm %>% select(`Titre du film`,Réalisateur,`Acteur 1`,`Acteur 2`,`Acteur 3`)
ActeurRéalRaw <- read_excel("/Users/theotimebourgeois/Documents/Documents/Critique Film.xlsx", sheet = "Acteurs")

ActeurRéal <- rbind(ActeurRéalFile %>% select(1,2,3) %>% `colnames<-`(c("Titre","Réalisateur","Acteur")),
      ActeurRéalFile %>% select(1,2,4) %>% `colnames<-`(c("Titre","Réalisateur","Acteur")),
      ActeurRéalFile %>% select(1,2,5) %>% `colnames<-`(c("Titre","Réalisateur","Acteur")),
      ActeurRéalRaw %>% select(`Titre du film`,Réalisateur,`Acteur`) %>% filter(!is.na(Réalisateur)) %>% `colnames<-`(c("Titre","Réalisateur","Acteur"))) %>% 
  unique()

ActeurRéal_table <- ActeurRéal %>%
  select(Réalisateur,Acteur) %>% 
  table() %>% 
  as.data.frame() %>%
  arrange(desc(Freq)) %>%
  filter(Freq>3)

ActeurFreq <- ActeurRéal %>% select(Acteur) %>% table() %>% as.data.frame() %>% filter(Acteur %in% ActeurRéal_table$Acteur)
RéalFreq <- ActeurRéal %>% select(Titre, Réalisateur) %>% unique() %>% select(Réalisateur) %>% table() %>% as.data.frame() %>% filter(Réalisateur %in% ActeurRéal_table$Réalisateur)

ActeurRéal_table_stat <- ActeurRéal_table %>%
  merge(ActeurFreq, by="Acteur") %>%
  `colnames<-`(c("Acteur","Réalisateur","Freq","FreqActeur")) %>%
  merge(RéalFreq, by="Réalisateur") %>%
  `colnames<-`(c("Acteur","Réalisateur","Freq","FreqActeur","FreqRéal")) %>% 
  mutate(StatActeur = (100*Freq/FreqActeur) %>% round(1),
         StatRéal = (100*Freq/FreqRéal) %>% round(1)) %>% 
  arrange(desc(StatRéal))

ActeurRéal_table_stat$Acteur <- ActeurRéal_table_stat$Acteur %>% str_replace_all("-","~")
ActeurRéal_table_stat$Réalisateur <- ActeurRéal_table_stat$Réalisateur %>% str_replace_all("-","~")

data <- ActeurRéal_table_stat %>% mutate(path = paste0(Acteur,"-",Réalisateur)) %>% select(path,Freq)

colfunc <- colorRampPalette(allcols)

library(treemap)
library(sunburstR)

sunburst(data, legend=F, colors=colfunc(43), sumNodes=T)
Legend
ObjectifYear <- CritiqueFilm$`Dernier visionnage` %>%
  format("%Y") %>%
  as.numeric() %>%
  table() %>%
  as.data.frame() %>%
  `colnames<-`(c("Année","Freq")) %>%
  mutate(Année=as.numeric(as.character(Année)),
         Sum=cumsum(Freq))

NYear <- 50

Age <- time_length(interval("1998-12-11", Beginning %m+% years(NYear)), "years") %>% round() %>% paste("years")

ObjectifYear <- seq(ObjectifYear$Année %>%min(),((ObjectifYear$Année %>% min()+NYear)/10) %>% round()*10, by=1) %>%
  as.data.frame() %>%
  `colnames<-`(c("Année")) %>% 
  merge(ObjectifYear, by="Année", all=T)

# ObjectifYear$Sum[ObjectifYear$Année==(ObjectifYear$Année %>% max())] <- 10000

ObjectifYearMax <- 10000
ObjectifYearMax3 <- ObjectifYearMax/2
ObjectifYearMax2 <- ObjectifYearMax3/2
ObjectifYearMax1 <- ObjectifYearMax2/2

ggplot(ObjectifYear,aes(x=Année, y=Sum))+
  geom_rect(aes(xmin=ObjectifYear$Année %>% min()-2,
                xmax=ObjectifYear$Année %>% min()-1,
                ymin=0,
                ymax=ObjectifYearMax1),
            fill="#FD6248")+
  geom_rect(aes(xmin=ObjectifYear$Année %>% min()-2,
                xmax=ObjectifYear$Année %>% min()-1,
                ymin=ObjectifYearMax1,
                ymax=ObjectifYearMax2),
            fill="#BFBFBF")+
  geom_rect(aes(xmin=ObjectifYear$Année %>% min()-2,
                xmax=ObjectifYear$Année %>% min()-1,
                ymin=ObjectifYearMax2,
                ymax=ObjectifYearMax3),
            fill=yellow)+
  geom_rect(aes(xmin=ObjectifYear$Année %>% min()-2,
                xmax=ObjectifYear$Année %>% min()-1,
                ymin=ObjectifYearMax3,
                ymax=ObjectifYearMax),
            fill="#4696FF")+
  # geom_hline(yintercept=ObjectifYearMax3, col=yellow)+
  # geom_hline(yintercept=ObjectifYearMax2, col="#BFBFBF")+
  # geom_hline(yintercept=ObjectifYearMax1, col="#FD6248")+
  geom_text(y=ObjectifYearMax3, x=2018, col=white, label="Gold", hjust=1.1, vjust=-0.1, size=3, angle=90, family = "AvertaPE-Black")+
  geom_text(y=ObjectifYearMax2, x=2018, col=white, label="Silver", hjust=1.1, vjust=-0.1, size=3, angle=90, family = "AvertaPE-Black")+
  geom_text(y=ObjectifYearMax1, x=2018, col=white, label="Bronze", hjust=1.1, vjust=-0.1, size=3, angle=90, family = "AvertaPE-Black")+
  geom_text(y=ObjectifYearMax, x=2018, col=white, label="Platinum", hjust=1.1, vjust=-0.1, size=3, angle=90, family = "AvertaPE-Black")+
  geom_segment(x=ObjectifYear$Année %>% min(),
               xend=ObjectifYear$Année %>% max(),
               y=0,
               yend=ObjectifYearMax,
               size=.5,
               col=yellow)+
  geom_smooth(method = lm, fullrange=TRUE, se = FALSE, size=.5, col=grey)+
  geom_col(fill=purple)+
  geom_text(aes(label=Sum), angle=90, y=0, hjust=-0.1, col=white, size=2)+
  scale_x_continuous(expand=c(0,0))+
  scale_y_continuous(expand=c(0,0))+
  labs(title="Title",
       subtitle="Test",
       y="Grade",
       col = "Saga")+
  theme(text=element_text(size=12, family="AvertaPE-Regular"),
        plot.margin = unit(c(0.2,1,0.2,0.2), "cm"),
        panel.background = element_blank(),
        panel.grid = element_blank(),
        panel.border = element_blank(),
        legend.position = "right",
        legend.background = element_blank(),
        axis.title = element_blank(),
        axis.line = element_line(colour = purple),
        plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
        plot.caption = element_text(size = 10, color = blue))
## Warning: Use of `ObjectifYear$Année` is discouraged.
## ℹ Use `Année` instead.
## Use of `ObjectifYear$Année` is discouraged.
## ℹ Use `Année` instead.
## Use of `ObjectifYear$Année` is discouraged.
## ℹ Use `Année` instead.
## Use of `ObjectifYear$Année` is discouraged.
## ℹ Use `Année` instead.
## Use of `ObjectifYear$Année` is discouraged.
## ℹ Use `Année` instead.
## Use of `ObjectifYear$Année` is discouraged.
## ℹ Use `Année` instead.
## Use of `ObjectifYear$Année` is discouraged.
## ℹ Use `Année` instead.
## Use of `ObjectifYear$Année` is discouraged.
## ℹ Use `Année` instead.
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 47 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 47 rows containing missing values (`position_stack()`).
## Warning: Removed 47 rows containing missing values (`geom_text()`).